diff --git a/preprocess/humanparsing/datasets/datasets.py b/preprocess/humanparsing/datasets/datasets.py
deleted file mode 100644
index 433f15a..0000000
--- a/preprocess/humanparsing/datasets/datasets.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   datasets.py
-@Time    :   8/4/19 3:35 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import os
-import numpy as np
-import random
-import torch
-import cv2
-from torch.utils import data
-from utils.transforms import get_affine_transform
-
-
-class LIPDataSet(data.Dataset):
-    def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
-                 rotation_factor=30, ignore_label=255, transform=None):
-        self.root = root
-        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
-        self.crop_size = np.asarray(crop_size)
-        self.ignore_label = ignore_label
-        self.scale_factor = scale_factor
-        self.rotation_factor = rotation_factor
-        self.flip_prob = 0.5
-        self.transform = transform
-        self.dataset = dataset
-
-        list_path = os.path.join(self.root, self.dataset + '_id.txt')
-        train_list = [i_id.strip() for i_id in open(list_path)]
-
-        self.train_list = train_list
-        self.number_samples = len(self.train_list)
-
-    def __len__(self):
-        return self.number_samples
-
-    def _box2cs(self, box):
-        x, y, w, h = box[:4]
-        return self._xywh2cs(x, y, w, h)
-
-    def _xywh2cs(self, x, y, w, h):
-        center = np.zeros((2), dtype=np.float32)
-        center[0] = x + w * 0.5
-        center[1] = y + h * 0.5
-        if w > self.aspect_ratio * h:
-            h = w * 1.0 / self.aspect_ratio
-        elif w < self.aspect_ratio * h:
-            w = h * self.aspect_ratio
-        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
-        return center, scale
-
-    def __getitem__(self, index):
-        train_item = self.train_list[index]
-
-        im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
-        parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
-
-        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
-        h, w, _ = im.shape
-        parsing_anno = np.zeros((h, w), dtype=np.long)
-
-        # Get person center and scale
-        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
-        r = 0
-
-        if self.dataset != 'test':
-            # Get pose annotation
-            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
-            if self.dataset == 'train' or self.dataset == 'trainval':
-                sf = self.scale_factor
-                rf = self.rotation_factor
-                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
-                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
-
-                if random.random() <= self.flip_prob:
-                    im = im[:, ::-1, :]
-                    parsing_anno = parsing_anno[:, ::-1]
-                    person_center[0] = im.shape[1] - person_center[0] - 1
-                    right_idx = [15, 17, 19]
-                    left_idx = [14, 16, 18]
-                    for i in range(0, 3):
-                        right_pos = np.where(parsing_anno == right_idx[i])
-                        left_pos = np.where(parsing_anno == left_idx[i])
-                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
-                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
-
-        trans = get_affine_transform(person_center, s, r, self.crop_size)
-        input = cv2.warpAffine(
-            im,
-            trans,
-            (int(self.crop_size[1]), int(self.crop_size[0])),
-            flags=cv2.INTER_LINEAR,
-            borderMode=cv2.BORDER_CONSTANT,
-            borderValue=(0, 0, 0))
-
-        if self.transform:
-            input = self.transform(input)
-
-        meta = {
-            'name': train_item,
-            'center': person_center,
-            'height': h,
-            'width': w,
-            'scale': s,
-            'rotation': r
-        }
-
-        if self.dataset == 'val' or self.dataset == 'test':
-            return input, meta
-        else:
-            label_parsing = cv2.warpAffine(
-                parsing_anno,
-                trans,
-                (int(self.crop_size[1]), int(self.crop_size[0])),
-                flags=cv2.INTER_NEAREST,
-                borderMode=cv2.BORDER_CONSTANT,
-                borderValue=(255))
-
-            label_parsing = torch.from_numpy(label_parsing)
-
-            return input, label_parsing, meta
-
-
-class LIPDataValSet(data.Dataset):
-    def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
-        self.root = root
-        self.crop_size = crop_size
-        self.transform = transform
-        self.flip = flip
-        self.dataset = dataset
-        self.root = root
-        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
-        self.crop_size = np.asarray(crop_size)
-
-        list_path = os.path.join(self.root, self.dataset + '_id.txt')
-        val_list = [i_id.strip() for i_id in open(list_path)]
-
-        self.val_list = val_list
-        self.number_samples = len(self.val_list)
-
-    def __len__(self):
-        return len(self.val_list)
-
-    def _box2cs(self, box):
-        x, y, w, h = box[:4]
-        return self._xywh2cs(x, y, w, h)
-
-    def _xywh2cs(self, x, y, w, h):
-        center = np.zeros((2), dtype=np.float32)
-        center[0] = x + w * 0.5
-        center[1] = y + h * 0.5
-        if w > self.aspect_ratio * h:
-            h = w * 1.0 / self.aspect_ratio
-        elif w < self.aspect_ratio * h:
-            w = h * self.aspect_ratio
-        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
-
-        return center, scale
-
-    def __getitem__(self, index):
-        val_item = self.val_list[index]
-        # Load training image
-        im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
-        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
-        h, w, _ = im.shape
-        # Get person center and scale
-        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
-        r = 0
-        trans = get_affine_transform(person_center, s, r, self.crop_size)
-        input = cv2.warpAffine(
-            im,
-            trans,
-            (int(self.crop_size[1]), int(self.crop_size[0])),
-            flags=cv2.INTER_LINEAR,
-            borderMode=cv2.BORDER_CONSTANT,
-            borderValue=(0, 0, 0))
-        input = self.transform(input)
-        flip_input = input.flip(dims=[-1])
-        if self.flip:
-            batch_input_im = torch.stack([input, flip_input])
-        else:
-            batch_input_im = input
-
-        meta = {
-            'name': val_item,
-            'center': person_center,
-            'height': h,
-            'width': w,
-            'scale': s,
-            'rotation': r
-        }
-
-        return batch_input_im, meta
diff --git a/preprocess/humanparsing/datasets/target_generation.py b/preprocess/humanparsing/datasets/target_generation.py
deleted file mode 100644
index 8524db4..0000000
--- a/preprocess/humanparsing/datasets/target_generation.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import torch
-from torch.nn import functional as F
-
-
-def generate_edge_tensor(label, edge_width=3):
-    label = label.type(torch.cuda.FloatTensor)
-    if len(label.shape) == 2:
-        label = label.unsqueeze(0)
-    n, h, w = label.shape
-    edge = torch.zeros(label.shape, dtype=torch.float).cuda()
-    # right
-    edge_right = edge[:, 1:h, :]
-    edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
-               & (label[:, :h - 1, :] != 255)] = 1
-
-    # up
-    edge_up = edge[:, :, :w - 1]
-    edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
-            & (label[:, :, :w - 1] != 255)
-            & (label[:, :, 1:w] != 255)] = 1
-
-    # upright
-    edge_upright = edge[:, :h - 1, :w - 1]
-    edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
-                 & (label[:, :h - 1, :w - 1] != 255)
-                 & (label[:, 1:h, 1:w] != 255)] = 1
-
-    # bottomright
-    edge_bottomright = edge[:, :h - 1, 1:w]
-    edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
-                     & (label[:, :h - 1, 1:w] != 255)
-                     & (label[:, 1:h, :w - 1] != 255)] = 1
-
-    kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
-    with torch.no_grad():
-        edge = edge.unsqueeze(1)
-        edge = F.conv2d(edge, kernel, stride=1, padding=1)
-    edge[edge!=0] = 1
-    edge = edge.squeeze()
-    return edge
diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py
deleted file mode 100644
index 8eccb3a..0000000
--- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import argparse
-import datetime
-import json
-import os
-from PIL import Image
-import numpy as np
-
-import pycococreatortools
-
-
-def get_arguments():
-    parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
-    parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
-    parser.add_argument("--json_save_dir", type=str, default='../data/msrcnn_finetune_annotations',
-                        help="path to save coco-style annotation json file")
-    parser.add_argument("--use_val", type=bool, default=False,
-                        help="use train+val set for finetuning or not")
-    parser.add_argument("--train_img_dir", type=str, default='../data/instance-level_human_parsing/Training/Images',
-                        help="train image path")
-    parser.add_argument("--train_anno_dir", type=str,
-                        default='../data/instance-level_human_parsing/Training/Human_ids',
-                        help="train human mask path")
-    parser.add_argument("--val_img_dir", type=str, default='../data/instance-level_human_parsing/Validation/Images',
-                        help="val image path")
-    parser.add_argument("--val_anno_dir", type=str,
-                        default='../data/instance-level_human_parsing/Validation/Human_ids',
-                        help="val human mask path")
-    return parser.parse_args()
-
-
-def main(args):
-    INFO = {
-        "description": args.split_name + " Dataset",
-        "url": "",
-        "version": "",
-        "year": 2019,
-        "contributor": "xyq",
-        "date_created": datetime.datetime.utcnow().isoformat(' ')
-    }
-
-    LICENSES = [
-        {
-            "id": 1,
-            "name": "",
-            "url": ""
-        }
-    ]
-
-    CATEGORIES = [
-        {
-            'id': 1,
-            'name': 'person',
-            'supercategory': 'person',
-        },
-    ]
-
-    coco_output = {
-        "info": INFO,
-        "licenses": LICENSES,
-        "categories": CATEGORIES,
-        "images": [],
-        "annotations": []
-    }
-
-    image_id = 1
-    segmentation_id = 1
-
-    for image_name in os.listdir(args.train_img_dir):
-        image = Image.open(os.path.join(args.train_img_dir, image_name))
-        image_info = pycococreatortools.create_image_info(
-            image_id, image_name, image.size
-        )
-        coco_output["images"].append(image_info)
-
-        human_mask_name = os.path.splitext(image_name)[0] + '.png'
-        human_mask = np.asarray(Image.open(os.path.join(args.train_anno_dir, human_mask_name)))
-        human_gt_labels = np.unique(human_mask)
-
-        for i in range(1, len(human_gt_labels)):
-            category_info = {'id': 1, 'is_crowd': 0}
-            binary_mask = np.uint8(human_mask == i)
-            annotation_info = pycococreatortools.create_annotation_info(
-                segmentation_id, image_id, category_info, binary_mask,
-                image.size, tolerance=10
-            )
-            if annotation_info is not None:
-                coco_output["annotations"].append(annotation_info)
-
-            segmentation_id += 1
-        image_id += 1
-
-    if not os.path.exists(args.json_save_dir):
-        os.makedirs(args.json_save_dir)
-    if not args.use_val:
-        with open('{}/{}_train.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
-            json.dump(coco_output, output_json_file)
-    else:
-        for image_name in os.listdir(args.val_img_dir):
-            image = Image.open(os.path.join(args.val_img_dir, image_name))
-            image_info = pycococreatortools.create_image_info(
-                image_id, image_name, image.size
-            )
-            coco_output["images"].append(image_info)
-
-            human_mask_name = os.path.splitext(image_name)[0] + '.png'
-            human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
-            human_gt_labels = np.unique(human_mask)
-
-            for i in range(1, len(human_gt_labels)):
-                category_info = {'id': 1, 'is_crowd': 0}
-                binary_mask = np.uint8(human_mask == i)
-                annotation_info = pycococreatortools.create_annotation_info(
-                    segmentation_id, image_id, category_info, binary_mask,
-                    image.size, tolerance=10
-                )
-                if annotation_info is not None:
-                    coco_output["annotations"].append(annotation_info)
-
-                segmentation_id += 1
-            image_id += 1
-
-        with open('{}/{}_trainval.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
-            json.dump(coco_output, output_json_file)
-
-    coco_output_val = {
-        "info": INFO,
-        "licenses": LICENSES,
-        "categories": CATEGORIES,
-        "images": [],
-        "annotations": []
-    }
-
-    image_id_val = 1
-    segmentation_id_val = 1
-
-    for image_name in os.listdir(args.val_img_dir):
-        image = Image.open(os.path.join(args.val_img_dir, image_name))
-        image_info = pycococreatortools.create_image_info(
-            image_id_val, image_name, image.size
-        )
-        coco_output_val["images"].append(image_info)
-
-        human_mask_name = os.path.splitext(image_name)[0] + '.png'
-        human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
-        human_gt_labels = np.unique(human_mask)
-
-        for i in range(1, len(human_gt_labels)):
-            category_info = {'id': 1, 'is_crowd': 0}
-            binary_mask = np.uint8(human_mask == i)
-            annotation_info = pycococreatortools.create_annotation_info(
-                segmentation_id_val, image_id_val, category_info, binary_mask,
-                image.size, tolerance=10
-            )
-            if annotation_info is not None:
-                coco_output_val["annotations"].append(annotation_info)
-
-            segmentation_id_val += 1
-        image_id_val += 1
-
-    with open('{}/{}_val.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file_val:
-        json.dump(coco_output_val, output_json_file_val)
-
-
-if __name__ == "__main__":
-    args = get_arguments()
-    main(args)
diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py
deleted file mode 100644
index 3f3d833..0000000
--- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import re
-import datetime
-import numpy as np
-from itertools import groupby
-from skimage import measure
-from PIL import Image
-from pycocotools import mask
-
-convert = lambda text: int(text) if text.isdigit() else text.lower()
-natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
-
-
-def resize_binary_mask(array, new_size):
-    image = Image.fromarray(array.astype(np.uint8) * 255)
-    image = image.resize(new_size)
-    return np.asarray(image).astype(np.bool_)
-
-
-def close_contour(contour):
-    if not np.array_equal(contour[0], contour[-1]):
-        contour = np.vstack((contour, contour[0]))
-    return contour
-
-
-def binary_mask_to_rle(binary_mask):
-    rle = {'counts': [], 'size': list(binary_mask.shape)}
-    counts = rle.get('counts')
-    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
-        if i == 0 and value == 1:
-            counts.append(0)
-        counts.append(len(list(elements)))
-
-    return rle
-
-
-def binary_mask_to_polygon(binary_mask, tolerance=0):
-    """Converts a binary mask to COCO polygon representation
-    Args:
-        binary_mask: a 2D binary numpy array where '1's represent the object
-        tolerance: Maximum distance from original points of polygon to approximated
-            polygonal chain. If tolerance is 0, the original coordinate array is returned.
-    """
-    polygons = []
-    # pad mask to close contours of shapes which start and end at an edge
-    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
-    contours = measure.find_contours(padded_binary_mask, 0.5)
-    contours = np.subtract(contours, 1)
-    for contour in contours:
-        contour = close_contour(contour)
-        contour = measure.approximate_polygon(contour, tolerance)
-        if len(contour) < 3:
-            continue
-        contour = np.flip(contour, axis=1)
-        segmentation = contour.ravel().tolist()
-        # after padding and subtracting 1 we may get -0.5 points in our segmentation 
-        segmentation = [0 if i < 0 else i for i in segmentation]
-        polygons.append(segmentation)
-
-    return polygons
-
-
-def create_image_info(image_id, file_name, image_size,
-                      date_captured=datetime.datetime.utcnow().isoformat(' '),
-                      license_id=1, coco_url="", flickr_url=""):
-    image_info = {
-        "id": image_id,
-        "file_name": file_name,
-        "width": image_size[0],
-        "height": image_size[1],
-        "date_captured": date_captured,
-        "license": license_id,
-        "coco_url": coco_url,
-        "flickr_url": flickr_url
-    }
-
-    return image_info
-
-
-def create_annotation_info(annotation_id, image_id, category_info, binary_mask,
-                           image_size=None, tolerance=2, bounding_box=None):
-    if image_size is not None:
-        binary_mask = resize_binary_mask(binary_mask, image_size)
-
-    binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
-
-    area = mask.area(binary_mask_encoded)
-    if area < 1:
-        return None
-
-    if bounding_box is None:
-        bounding_box = mask.toBbox(binary_mask_encoded)
-
-    if category_info["is_crowd"]:
-        is_crowd = 1
-        segmentation = binary_mask_to_rle(binary_mask)
-    else:
-        is_crowd = 0
-        segmentation = binary_mask_to_polygon(binary_mask, tolerance)
-        if not segmentation:
-            return None
-
-    annotation_info = {
-        "id": annotation_id,
-        "image_id": image_id,
-        "category_id": category_info["id"],
-        "iscrowd": is_crowd,
-        "area": area.tolist(),
-        "bbox": bounding_box.tolist(),
-        "segmentation": segmentation,
-        "width": binary_mask.shape[1],
-        "height": binary_mask.shape[0],
-    }
-
-    return annotation_info
diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py
deleted file mode 100644
index 1733918..0000000
--- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import argparse
-import datetime
-import json
-import os
-from PIL import Image
-
-import pycococreatortools
-
-
-def get_arguments():
-    parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
-    parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
-    parser.add_argument("--json_save_dir", type=str, default='../data/CIHP/annotations',
-                        help="path to save coco-style annotation json file")
-    parser.add_argument("--test_img_dir", type=str, default='../data/CIHP/Testing/Images',
-                        help="test image path")
-    return parser.parse_args()
-
-args = get_arguments()
-
-INFO = {
-    "description": args.dataset + "Dataset",
-    "url": "",
-    "version": "",
-    "year": 2020,
-    "contributor": "yunqiuxu",
-    "date_created": datetime.datetime.utcnow().isoformat(' ')
-}
-
-LICENSES = [
-    {
-        "id": 1,
-        "name": "",
-        "url": ""
-    }
-]
-
-CATEGORIES = [
-    {
-        'id': 1,
-        'name': 'person',
-        'supercategory': 'person',
-    },
-]
-
-
-def main(args):
-    coco_output = {
-        "info": INFO,
-        "licenses": LICENSES,
-        "categories": CATEGORIES,
-        "images": [],
-        "annotations": []
-    }
-
-    image_id = 1
-
-    for image_name in os.listdir(args.test_img_dir):
-        image = Image.open(os.path.join(args.test_img_dir, image_name))
-        image_info = pycococreatortools.create_image_info(
-            image_id, image_name, image.size
-        )
-        coco_output["images"].append(image_info)
-        image_id += 1
-
-    if not os.path.exists(os.path.join(args.json_save_dir)):
-        os.mkdir(os.path.join(args.json_save_dir))
-
-    with open('{}/{}.json'.format(args.json_save_dir, args.dataset), 'w') as output_json_file:
-        json.dump(coco_output, output_json_file)
-
-
-if __name__ == "__main__":
-    main(args)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml b/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml
deleted file mode 100644
index 6c60588..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml
+++ /dev/null
@@ -1,179 +0,0 @@
-# Python CircleCI 2.0 configuration file
-#
-# Check https://circleci.com/docs/2.0/language-python/ for more details
-#
-version: 2
-
-# -------------------------------------------------------------------------------------
-# Environments to run the jobs in
-# -------------------------------------------------------------------------------------
-cpu: &cpu
-  docker:
-    - image: circleci/python:3.6.8-stretch
-  resource_class: medium
-
-gpu: &gpu
-  machine:
-    image: ubuntu-1604:201903-01
-    docker_layer_caching: true
-  resource_class: gpu.small
-
-# -------------------------------------------------------------------------------------
-# Re-usable commands
-# -------------------------------------------------------------------------------------
-install_python: &install_python
-  - run:
-      name: Install Python
-      working_directory: ~/
-      command: |
-        pyenv install 3.6.1
-        pyenv global 3.6.1
-
-setup_venv: &setup_venv
-  - run:
-      name: Setup Virtual Env
-      working_directory: ~/
-      command: |
-        python -m venv ~/venv
-        echo ". ~/venv/bin/activate" >> $BASH_ENV
-        . ~/venv/bin/activate
-        python --version
-        which python
-        which pip
-        pip install --upgrade pip
-
-install_dep: &install_dep
-  - run:
-      name: Install Dependencies
-      command: |
-        pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
-        pip install --progress-bar off cython opencv-python
-        pip install --progress-bar off 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
-        pip install --progress-bar off torch torchvision
-
-install_detectron2: &install_detectron2
-  - run:
-      name: Install Detectron2
-      command: |
-        gcc --version
-        pip install -U --progress-bar off -e .[dev]
-        python -m detectron2.utils.collect_env
-
-install_nvidia_driver: &install_nvidia_driver
-  - run:
-      name: Install nvidia driver
-      working_directory: ~/
-      command: |
-        wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
-        sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
-        nvidia-smi
-
-run_unittests: &run_unittests
-  - run:
-      name: Run Unit Tests
-      command: |
-        python -m unittest discover -v -s tests
-
-# -------------------------------------------------------------------------------------
-# Jobs to run
-# -------------------------------------------------------------------------------------
-jobs:
-  cpu_tests:
-    <<: *cpu
-
-    working_directory: ~/detectron2
-
-    steps:
-      - checkout
-      - <<: *setup_venv
-
-      # Cache the venv directory that contains dependencies
-      - restore_cache:
-          keys:
-            - cache-key-{{ .Branch }}-ID-20200425
-
-      - <<: *install_dep
-
-      - save_cache:
-          paths:
-            - ~/venv
-          key: cache-key-{{ .Branch }}-ID-20200425
-
-      - <<: *install_detectron2
-
-      - run:
-          name: isort
-          command: |
-            isort -c -sp .
-      - run:
-          name: black
-          command: |
-            black --check -l 100 .
-      - run:
-          name: flake8
-          command: |
-            flake8 .
-
-      - <<: *run_unittests
-
-  gpu_tests:
-    <<: *gpu
-
-    working_directory: ~/detectron2
-
-    steps:
-      - checkout
-      - <<: *install_nvidia_driver
-
-      - run:
-          name: Install nvidia-docker
-          working_directory: ~/
-          command: |
-            curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-            distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
-            curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
-            sudo tee /etc/apt/sources.list.d/nvidia-docker.list
-            sudo apt-get update && sudo apt-get install -y nvidia-docker2
-            # reload the docker daemon configuration
-            sudo pkill -SIGHUP dockerd
-
-      - run:
-          name: Launch docker
-          working_directory: ~/detectron2/docker
-          command: |
-            nvidia-docker build -t detectron2:v0 -f Dockerfile-circleci .
-            nvidia-docker run -itd --name d2 detectron2:v0
-            docker exec -it d2 nvidia-smi
-
-      - run:
-          name: Build Detectron2
-          command: |
-            docker exec -it d2 pip install 'git+https://github.com/facebookresearch/fvcore'
-            docker cp ~/detectron2 d2:/detectron2
-            # This will build d2 for the target GPU arch only
-            docker exec -it d2 pip install -e /detectron2
-            docker exec -it d2 python3 -m detectron2.utils.collect_env
-            docker exec -it d2 python3 -c 'import torch; assert(torch.cuda.is_available())'
-
-      - run:
-          name: Run Unit Tests
-          command: |
-            docker exec -e CIRCLECI=true -it d2 python3 -m unittest discover -v -s /detectron2/tests
-
-workflows:
-  version: 2
-  regular_test:
-    jobs:
-      - cpu_tests
-      - gpu_tests
-
-  #nightly_test:
-    #jobs:
-      #- gpu_tests
-    #triggers:
-      #- schedule:
-          #cron: "0 0 * * *"
-          #filters:
-            #branches:
-              #only:
-                #- master
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.clang-format b/preprocess/humanparsing/mhp_extension/detectron2/.clang-format
deleted file mode 100644
index a757d4f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.clang-format
+++ /dev/null
@@ -1,85 +0,0 @@
-AccessModifierOffset: -1
-AlignAfterOpenBracket: AlwaysBreak
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlinesLeft: true
-AlignOperands:   false
-AlignTrailingComments: false
-AllowAllParametersOfDeclarationOnNextLine: false
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: false
-BinPackParameters: false
-BraceWrapping:
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: false
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat:   false
-ForEachMacros:   [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ]
-IncludeCategories:
-  - Regex:           '^<.*\.h(pp)?>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IndentCaseLabels: true
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-ReflowComments:  true
-SortIncludes:    true
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Cpp11
-TabWidth:        8
-UseTab:          Never
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.flake8 b/preprocess/humanparsing/mhp_extension/detectron2/.flake8
deleted file mode 100644
index 0cc61b7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.flake8
+++ /dev/null
@@ -1,9 +0,0 @@
-# This is an example .flake8 config, used when developing *Black* itself.
-# Keep in sync with setup.cfg which is used for source packages.
-
-[flake8]
-ignore = W503, E203, E221, C901, C408, E741
-max-line-length = 100
-max-complexity = 18
-select = B,C,E,F,W,T4,B9
-exclude = build,__init__.py
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md
deleted file mode 100644
index 0f7ad8b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Code of Conduct
-
-Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
-Please read the [full text](https://code.fb.com/codeofconduct/)
-so that you can understand what actions will and will not be tolerated.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md
deleted file mode 100644
index 81936df..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Contributing to detectron2
-
-## Issues
-We use GitHub issues to track public bugs and questions.
-Please make sure to follow one of the
-[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
-when reporting any issues.
-
-Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
-disclosure of security bugs. In those cases, please go through the process
-outlined on that page and do not file a public issue.
-
-## Pull Requests
-We actively welcome your pull requests.
-
-However, if you're adding any significant features (e.g. > 50 lines), please
-make sure to have a corresponding issue to discuss your motivation and proposals,
-before sending a PR. We do not always accept new features, and we take the following
-factors into consideration:
-
-1. Whether the same feature can be achieved without modifying detectron2.
-Detectron2 is designed so that you can implement many extensions from the outside, e.g.
-those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
-If some part is not as extensible, you can also bring up the issue to make it more extensible.
-2. Whether the feature is potentially useful to a large audience, or only to a small portion of users.
-3. Whether the proposed solution has a good design / interface.
-4. Whether the proposed solution adds extra mental/practical overhead to users who don't
-   need such feature.
-5. Whether the proposed solution breaks existing APIs.
-
-When sending a PR, please do:
-
-1. If a PR contains multiple orthogonal changes, split it to several PRs.
-2. If you've added code that should be tested, add tests.
-3. For PRs that need experiments (e.g. adding a new model or new methods),
-	 you don't need to update model zoo, but do provide experiment results in the description of the PR.
-4. If APIs are changed, update the documentation.
-5. Make sure your code lints with `./dev/linter.sh`.
-
-
-## Contributor License Agreement ("CLA")
-In order to accept your pull request, we need you to submit a CLA. You only need
-to do this once to work on any of Facebook's open source projects.
-
-Complete your CLA here: <https://code.facebook.com/cla>
-
-## License
-By contributing to detectron2, you agree that your contributions will be licensed
-under the LICENSE file in the root directory of this source tree.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg b/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg
deleted file mode 100644
index eb2d643..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1930.09 354.96"><defs><style>.cls-1{fill:#aab4bc;}.cls-2{fill:#d2d6d7;}.cls-3{fill:#9da2ab;}.cls-4{fill:#e7eef1;}.cls-5{fill:#5173f1;}.cls-6{opacity:0.7;}.cls-7{fill:#797f89;}.cls-8{fill:#e3e7e9;}.cls-9{fill:#161622;}.cls-10{fill:#3f4652;}.cls-11{fill:#fff;}</style></defs><title>Detectron2-Logo-Horz</title><path class="cls-1" d="M191.24,31h71.34a4.87,4.87,0,0,1,4.87,4.87v5a0,0,0,0,1,0,0H186.38a0,0,0,0,1,0,0v-5A4.87,4.87,0,0,1,191.24,31Z"/><path class="cls-2" d="M412.92,100.67V263.61c0,.69,0,1.33,0,2a59.73,59.73,0,0,1-59.73,57.74H100.73A59.8,59.8,0,0,1,40.9,263.61V100.67c0-.69,0-1.33,0-2a59.33,59.33,0,0,1,8.79-29.21c.76-1.24,1.57-2.46,2.42-3.64A59.76,59.76,0,0,1,100.73,40.9H353.15a59.78,59.78,0,0,1,59.77,59.77Z"/><rect class="cls-3" x="198.81" y="262.89" width="55.95" height="41.28" rx="10.15"/><path class="cls-4" d="M244.61,260.72H209A12.33,12.33,0,0,0,196.64,273v21A12.33,12.33,0,0,0,209,306.33h35.65A12.32,12.32,0,0,0,256.92,294V273A12.32,12.32,0,0,0,244.61,260.72ZM209,265.05h35.65a8,8,0,0,1,8,8v1.45H201V273A8,8,0,0,1,209,265.05Zm43.63,13.76v9.43H201v-9.43Zm-8,23.19H209a8,8,0,0,1-8-8v-1.44h51.61V294A8,8,0,0,1,244.61,302Z"/><path class="cls-1" d="M382.21,177.18h71.34a4.87,4.87,0,0,1,4.87,4.87v5a0,0,0,0,1,0,0H377.35a0,0,0,0,1,0,0v-5A4.87,4.87,0,0,1,382.21,177.18Z" transform="translate(600.02 -235.74) rotate(90)"/><path class="cls-1" d="M.28,177.18H71.62a4.87,4.87,0,0,1,4.87,4.87v5a0,0,0,0,1,0,0H-4.59a0,0,0,0,1,0,0v-5A4.87,4.87,0,0,1,.28,177.18Z" transform="translate(-146.19 218.09) rotate(-90)"/><circle class="cls-1" cx="83.04" cy="283.53" r="6.28"/><circle class="cls-1" cx="370.79" cy="283.53" r="6.28"/><circle class="cls-1" cx="226.91" cy="66.06" r="6.28"/><circle class="cls-5" cx="368.44" cy="82.89" r="20.49"/><polygon class="cls-1" points="412.92 179.98 316.61 179.98 312.27 179.98 141.55 179.98 137.21 179.98 40.9 179.98 40.9 184.3 137.21 184.3 137.21 323.38 141.55 323.38 141.55 184.3 312.27 184.3 312.27 323.38 316.61 323.38 316.61 184.3 412.92 184.3 412.92 179.98"/><g class="cls-6"><path class="cls-7" d="M403.72,193a81.13,81.13,0,1,1-81.15-81.1A81.12,81.12,0,0,1,403.72,193Z"/></g><path class="cls-8" d="M313.71,104.06a76.74,76.74,0,1,0,76.74,76.74A76.75,76.75,0,0,0,313.71,104.06Zm0,132.48a55.74,55.74,0,1,1,55.73-55.74A55.8,55.8,0,0,1,313.71,236.54Z"/><path class="cls-9" d="M376.27,180.79a62.57,62.57,0,1,1-125.13,0,61,61,0,0,1,1.93-15.33,62.55,62.55,0,0,1,123.2,15.33Z"/><path class="cls-3" d="M313.71,121.19a59.6,59.6,0,1,1-59.6,59.6A57.93,57.93,0,0,1,256,166.18a59.72,59.72,0,0,1,57.76-45m0-3.65a63.36,63.36,0,0,0-61.3,47.75,61.81,61.81,0,0,0-1.95,15.5,63.25,63.25,0,1,0,63.25-63.25Z"/><g class="cls-6"><path class="cls-7" d="M228.66,193a81.12,81.12,0,1,1-81.14-81.1A81.11,81.11,0,0,1,228.66,193Z"/></g><path class="cls-8" d="M138.65,104.06A76.74,76.74,0,1,0,215.4,180.8,76.74,76.74,0,0,0,138.65,104.06Zm0,132.48a55.74,55.74,0,1,1,55.74-55.74A55.8,55.8,0,0,1,138.65,236.54Z"/><path class="cls-9" d="M201.22,180.79a62.57,62.57,0,1,1-125.13,0A61,61,0,0,1,78,165.46a62.55,62.55,0,0,1,123.2,15.33Z"/><path class="cls-3" d="M138.65,121.19a59.6,59.6,0,1,1-59.6,59.6,58.38,58.38,0,0,1,1.84-14.61,59.72,59.72,0,0,1,57.76-45m0-3.65a63.39,63.39,0,0,0-61.3,47.75,62.28,62.28,0,0,0-1.94,15.5,63.25,63.25,0,1,0,63.24-63.25Z"/><circle class="cls-10" cx="313.71" cy="180.79" r="29"/><circle class="cls-10" cx="138.65" cy="180.79" r="29"/><circle class="cls-11" cx="154.83" cy="156.49" r="12.7"/><circle class="cls-11" cx="329.89" cy="156.49" r="12.7"/><path class="cls-1" d="M312.27,40.91V81.77a100.32,100.32,0,0,0-72.71,33.61H214.3A100.51,100.51,0,0,0,142,81.82V40.9h-4.33V81.77A99.56,99.56,0,0,0,86.17,97.06l-34-31.27c-.85,1.18-1.66,2.4-2.42,3.64l36,33.1,0,0a95.88,95.88,0,0,1,126,16.46l.65.74h29.18l.65-.74a96,96,0,0,1,72.27-32.89h2.17V40.91Z"/><path class="cls-5" d="M1899.11,280.92H1758.56V251.65l81.53-77.75q19.44-18.35,19.44-39.32,0-14.55-9-23.29t-24.15-8.74q-16.59,0-25,9.6t-8.44,25.32l.87,9.6h-35.21a77.72,77.72,0,0,1-.58-10.19q0-30,18.77-48.45T1826.36,70q32,0,50.48,17.75t18.48,46q0,20.08-8,35.49t-27.22,32.29l-52.95,46.87h92Z"/><path class="cls-10" d="M557.9,280.92H487.77V74.32H557.9q52.38,0,81.62,28.37t29.24,74.93q0,46.56-29.24,74.93T557.9,280.92Zm54.85-51.36q18.76-18.76,18.77-51.94t-18.77-51.94q-18.76-18.77-56-18.77H523V248.33h33.76Q594,248.33,612.75,229.56Z"/><path class="cls-10" d="M826.87,215.45H711.93q2,18,13.1,28.66t29.1,10.62a40.72,40.72,0,0,0,21.53-5.82,32.61,32.61,0,0,0,13.68-15.71h34.91a70.46,70.46,0,0,1-26,37.1q-19.07,14.11-45,14.11-33.75,0-54.56-22.41t-20.8-56.74q0-33.45,21-56.15T753,126.41q33.18,0,53.69,22.26t20.51,56ZM753,154.63q-16.29,0-27.06,9.61t-13.38,25.6h80.31q-2.34-16-12.81-25.6T753,154.63Z"/><path class="cls-10" d="M915.19,250.08v30q-6.41,1.74-18,1.74-44.24,0-44.23-44.52V157.54h-23V129.9h23V90.62h34.63V129.9h28.22v27.64H887.55v76.24q0,17.76,16.88,17.75Z"/><path class="cls-10" d="M1075.48,215.45H960.54q2,18,13.09,28.66t29.1,10.62a40.72,40.72,0,0,0,21.53-5.82,32.55,32.55,0,0,0,13.68-15.71h34.92a70.48,70.48,0,0,1-26,37.1q-19.05,14.11-44.95,14.11-33.76,0-54.56-22.41t-20.8-56.74q0-33.45,20.94-56.15t54.13-22.7q33.16,0,53.68,22.26t20.52,56Zm-73.91-60.82q-16.3,0-27.06,9.61t-13.39,25.6h80.31q-2.33-16-12.8-25.6T1001.57,154.63Z"/><path class="cls-10" d="M1086.1,205.56q0-33.47,21.24-56.31t54.13-22.84q31.13,0,49.61,17.6t22,40.59h-35.5a36,36,0,0,0-13-20.08q-9.75-7.56-23.42-7.56-18.33,0-29.39,13.53t-11.06,35.07q0,21.52,11.06,34.91t29.39,13.39q13.66,0,23.42-7.57a35.88,35.88,0,0,0,13-20.08h35.5q-3.49,23-22,40.6t-49.61,17.6q-32.9,0-54.13-22.84T1086.1,205.56Z"/><path class="cls-10" d="M1322.58,250.08v30q-6.4,1.74-18,1.74-44.22,0-44.23-44.52V157.54h-23V129.9h23V90.62h34.63V129.9h28.23v27.64h-28.23v76.24q0,17.76,16.88,17.75Z"/><path class="cls-10" d="M1428.44,128.74V161a55.4,55.4,0,0,0-7.85-.59q-39,0-39,41.91v78.56H1347v-151h32v20.95q12.8-22.41,44.51-22.41Z"/><path class="cls-10" d="M1507.79,284.41q-34.92,0-56.6-23t-21.67-55.86q0-32.9,21.67-56t56.6-23.13q35.2,0,56.89,23.13t21.68,56q0,32.88-21.68,55.86T1507.79,284.41Zm-43.65-78.85q0,21.52,12.37,34.91t31.28,13.39q19.2,0,31.57-13.39t12.37-34.91q0-21.84-12.37-35.21T1507.79,157q-18.91,0-31.28,13.39T1464.14,205.56Z"/><path class="cls-10" d="M1631.22,129.9V150q5.25-9.9,17.32-16.74t29.24-6.83q26.78,0,41.47,16.29t14.69,43.36v94.86h-34.63v-90.5q0-16-7.42-25.17t-22.55-9.16q-16.58,0-26,9.89t-9.46,27.35v87.59h-34.62v-151Z"/></svg>
\ No newline at end of file
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md
deleted file mode 100644
index 5e8aaa2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md
+++ /dev/null
@@ -1,5 +0,0 @@
-
-Please select an issue template from
-https://github.com/facebookresearch/detectron2/issues/new/choose .
-
-Otherwise your issue will be closed.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md
deleted file mode 100644
index 52d2998..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-name: "🐛 Bugs"
-about: Report bugs in detectron2
-title: Please read & provide the following
-
----
-
-## Instructions To Reproduce the 🐛 Bug:
-
-1. what changes you made (`git diff`) or what code you wrote
-```
-<put diff or code here>
-```
-2. what exact command you run:
-3. what you observed (including __full logs__):
-```
-<put logs here>
-```
-4. please simplify the steps as much as possible so they do not require additional resources to
-	 run, such as a private dataset.
-
-## Expected behavior:
-
-If there are no obvious error in "what you observed" provided above,
-please tell us the expected behavior.
-
-## Environment:
-
-Provide your environment information using the following command:
-```
-wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
-```
-
-If your issue looks like an installation issue / environment issue,
-please first try to solve it yourself with the instructions in
-https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index c19e249..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-# require an issue template to be chosen
-blank_issues_enabled: false
-
-# Unexpected behaviors & bugs are split to two templates.
-# When they are one template, users think "it's not a bug" and don't choose the template.
-#
-# But the file name is still "unexpected-problems-bugs.md" so that old references
-# to this issue template still works.
-# It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
deleted file mode 100644
index dd69a33..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-name: "\U0001F680Feature Request"
-about: Submit a proposal/request for a new detectron2 feature
-
----
-
-## 🚀 Feature
-A clear and concise description of the feature proposal.
-
-
-## Motivation & Examples
-
-Tell us why the feature is useful.
-
-Describe what the feature would look like, if it is implemented.
-Best demonstrated using **code examples** in addition to words.
-
-## Note
-
-We only consider adding new features if they are relevant to many users.
-
-If you request implementation of research papers --
-we only consider papers that have enough significance and prevalance in the object detection field.
-
-We do not take requests for most projects in the `projects/` directory,
-because they are research code release that is mainly for other researchers to reproduce results.
-
-Instead of adding features inside detectron2,
-you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html).
-The [projects/](https://github.com/facebookresearch/detectron2/tree/master/projects/) directory contains many of such examples.
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md
deleted file mode 100644
index 0811561..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "❓How to do something?"
-about: How to do something using detectron2? What does an API do?
-
----
-
-## ❓ How to do something using detectron2
-
-Describe what you want to do, including:
-1. what inputs you will provide, if any:
-2. what outputs you are expecting:
-
-## ❓ What does an API do and how to use it?
-Please link to which API or documentation you're asking about from
-https://detectron2.readthedocs.io/
-
-
-NOTE:
-
-1. Only general answers are provided.
-   If you want to ask about "why X did not work", please use the
-   [Unexpected behaviors](https://github.com/facebookresearch/detectron2/issues/new/choose) issue template.
-
-2. About how to implement new models / new dataloader / new training logic, etc., check documentation first.
-
-3. We do not answer general machine learning / computer vision questions that are not specific to detectron2, such as how a model works, how to improve your training/make it converge, or what algorithm/methods can be used to achieve X.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
deleted file mode 100644
index bafee7a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-name: "Unexpected behaviors"
-about: Run into unexpected behaviors when using detectron2
-title: Please read & provide the following
-
----
-
-If you do not know the root cause of the problem, and wish someone to help you, please
-post according to this template:
-
-## Instructions To Reproduce the Issue:
-
-1. what changes you made (`git diff`) or what code you wrote
-```
-<put diff or code here>
-```
-2. what exact command you run:
-3. what you observed (including __full logs__):
-```
-<put logs here>
-```
-4. please simplify the steps as much as possible so they do not require additional resources to
-	 run, such as a private dataset.
-
-## Expected behavior:
-
-If there are no obvious error in "what you observed" provided above,
-please tell us the expected behavior.
-
-If you expect the model to converge / work better, note that we do not give suggestions
-on how to train a new model.
-Only in one of the two conditions we will help with it:
-(1) You're unable to reproduce the results in detectron2 model zoo.
-(2) It indicates a detectron2 bug.
-
-## Environment:
-
-Provide your environment information using the following command:
-```
-wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
-```
-
-If your issue looks like an installation issue / environment issue,
-please first try to solve it yourself with the instructions in
-https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md
deleted file mode 100644
index 4ff5ea5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Thanks for your contribution!
-
-If you're sending a large PR (e.g., >50 lines),
-please open an issue first about the feature / bug, and indicate how you want to contribute.
-
-Before submitting a PR, please run `dev/linter.sh` to lint the code.
-
-See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests
-about how we handle PRs.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.gitignore b/preprocess/humanparsing/mhp_extension/detectron2/.gitignore
deleted file mode 100644
index e85df4c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/.gitignore
+++ /dev/null
@@ -1,46 +0,0 @@
-# output dir
-output
-instant_test_output
-inference_test_output
-
-
-*.jpg
-*.png
-*.txt
-*.json
-*.diff
-
-# compilation and distribution
-__pycache__
-_ext
-*.pyc
-*.so
-detectron2.egg-info/
-build/
-dist/
-wheels/
-
-# pytorch/python/numpy formats
-*.pth
-*.pkl
-*.npy
-
-# ipython/jupyter notebooks
-*.ipynb
-**/.ipynb_checkpoints/
-
-# Editor temporaries
-*.swn
-*.swo
-*.swp
-*~
-
-# editor settings
-.idea
-.vscode
-
-# project dirs
-/detectron2/model_zoo/configs
-/datasets
-/projects/*/datasets
-/models
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md b/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md
deleted file mode 100644
index acaf13f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md
+++ /dev/null
@@ -1,79 +0,0 @@
-## Getting Started with Detectron2
-
-This document provides a brief intro of the usage of builtin command-line tools in detectron2.
-
-For a tutorial that involves actual coding with the API,
-see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-which covers how to run inference with an
-existing model, and how to train a builtin model on a custom dataset.
-
-For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
-
-
-### Inference Demo with Pre-trained Models
-
-1. Pick a model and its config file from
-	[model zoo](MODEL_ZOO.md),
-	for example, `mask_rcnn_R_50_FPN_3x.yaml`.
-2. We provide `demo.py` that is able to run builtin standard models. Run it with:
-```
-cd demo/
-python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
-  --input input1.jpg input2.jpg \
-  [--other-options]
-  --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
-```
-The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
-This command will run the inference and show visualizations in an OpenCV window.
-
-For details of the command line arguments, see `demo.py -h` or look at its source code
-to understand its behavior. Some common arguments are:
-* To run __on your webcam__, replace `--input files` with `--webcam`.
-* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
-* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
-* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
-
-
-### Training & Evaluation in Command Line
-
-We provide a script in "tools/{,plain_}train_net.py", that is made to train
-all the configs provided in detectron2.
-You may want to use it as a reference to write your own training script.
-
-To train a model with "train_net.py", first
-setup the corresponding datasets following
-[datasets/README.md](./datasets/README.md),
-then run:
-```
-cd tools/
-./train_net.py --num-gpus 8 \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
-```
-
-The configs are made for 8-GPU training.
-To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
-```
-./train_net.py \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
-	--num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
-```
-
-For most models, CPU training is not supported.
-
-To evaluate a model's performance, use
-```
-./train_net.py \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
-	--eval-only MODEL.WEIGHTS /path/to/checkpoint_file
-```
-For more options, see `./train_net.py -h`.
-
-### Use Detectron2 APIs in Your Code
-
-See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-to learn how to use detectron2 APIs to:
-1. run inference with an existing model
-2. train a builtin model on a custom dataset
-
-See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
-for more ways to build your project on detectron2.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md b/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md
deleted file mode 100644
index 3985f8a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md
+++ /dev/null
@@ -1,184 +0,0 @@
-## Installation
-
-Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-has step-by-step instructions that install detectron2.
-The [Dockerfile](docker)
-also installs detectron2 with a few simple commands.
-
-### Requirements
-- Linux or macOS with Python ≥ 3.6
-- PyTorch ≥ 1.4
-- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
-	You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
-- OpenCV, optional, needed by demo and visualization
-- pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
-
-
-### Build Detectron2 from Source
-
-gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
-After having them, run:
-```
-python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
-# (add --user if you don't have permission)
-
-# Or, to install it from a local clone:
-git clone https://github.com/facebookresearch/detectron2.git
-python -m pip install -e detectron2
-
-# Or if you are on macOS
-# CC=clang CXX=clang++ python -m pip install -e .
-```
-
-To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
-old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
-
-### Install Pre-Built Detectron2 (Linux only)
-```
-# for CUDA 10.1:
-python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html
-```
-You can replace cu101 with "cu{100,92}" or "cpu".
-
-Note that:
-1. Such installation has to be used with certain version of official PyTorch release.
-   See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements.
-   It will not work with a different version of PyTorch or a non-official build of PyTorch.
-2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be
-	 compatible with the master branch of a research project that uses detectron2 (e.g. those in
-	 [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)).
-
-### Common Installation Issues
-
-If you met issues using the pre-built detectron2, please uninstall it and try building it from source.
-
-Click each issue for its solutions:
-
-<details>
-<summary>
-Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library.
-</summary>
-<br/>
-
-This usually happens when detectron2 or torchvision is not
-compiled with the version of PyTorch you're running.
-
-Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch.
-If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
-following [pytorch.org](http://pytorch.org). So the versions will match.
-
-If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
-to see the corresponding pytorch version required for each pre-built detectron2.
-
-If the error comes from detectron2 or torchvision that you built manually from source,
-remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
-
-If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
-in your issue.
-</details>
-
-<details>
-<summary>
-Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
-</summary>
-<br/>
-Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
-
-This often happens with old anaconda.
-Try `conda update libgcc`. Then rebuild detectron2.
-
-The fundamental solution is to run the code with proper C++ runtime.
-One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
-
-</details>
-
-<details>
-<summary>
-"Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
-</summary>
-<br/>
-CUDA is not found when building detectron2.
-You should make sure
-
-```
-python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
-```
-
-print valid outputs at the time you build detectron2.
-
-Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
-</details>
-
-<details>
-<summary>
-"invalid device function" or "no kernel image is available for execution".
-</summary>
-<br/>
-Two possibilities:
-
-* You build detectron2 with one version of CUDA but run it with a different version.
-
-  To check whether it is the case,
-  use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
-	In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
-	to contain cuda libraries of the same version.
-
-	When they are inconsistent,
-	you need to either install a different build of PyTorch (or build by yourself)
-	to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
-
-* Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility).
-
-	The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in
-	`python -m detectron2.utils.collect_env`.
-
-	The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected
-	during compilation. This means the compiled code may not work on a different GPU model.
-	To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation.
-
-	For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s.
-	Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out
-	the correct compute compatibility number for your device.
-
-</details>
-
-<details>
-<summary>
-Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures.
-</summary>
-<br/>
-The version of NVCC you use to build detectron2 or torchvision does
-not match the version of CUDA you are running with.
-This often happens when using anaconda's CUDA runtime.
-
-Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
-In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
-to contain cuda libraries of the same version.
-
-When they are inconsistent,
-you need to either install a different build of PyTorch (or build by yourself)
-to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
-</details>
-
-
-<details>
-<summary>
-"ImportError: cannot import name '_C'".
-</summary>
-<br/>
-Please build and install detectron2 following the instructions above.
-
-If you are running code from detectron2's root directory, `cd` to a different one.
-Otherwise you may not import the code that you installed.
-</details>
-
-<details>
-<summary>
-ONNX conversion segfault after some "TraceWarning".
-</summary>
-<br/>
-The ONNX package is compiled with too old compiler.
-
-Please build and install ONNX from its source code using a compiler
-whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
-</details>
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/LICENSE b/preprocess/humanparsing/mhp_extension/detectron2/LICENSE
deleted file mode 100644
index d483689..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-Version 2.0, January 2004
-http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-"License" shall mean the terms and conditions for use, reproduction,
-and distribution as defined by Sections 1 through 9 of this document.
-
-"Licensor" shall mean the copyright owner or entity authorized by
-the copyright owner that is granting the License.
-
-"Legal Entity" shall mean the union of the acting entity and all
-other entities that control, are controlled by, or are under common
-control with that entity. For the purposes of this definition,
-"control" means (i) the power, direct or indirect, to cause the
-direction or management of such entity, whether by contract or
-otherwise, or (ii) ownership of fifty percent (50%) or more of the
-outstanding shares, or (iii) beneficial ownership of such entity.
-
-"You" (or "Your") shall mean an individual or Legal Entity
-exercising permissions granted by this License.
-
-"Source" form shall mean the preferred form for making modifications,
-including but not limited to software source code, documentation
-source, and configuration files.
-
-"Object" form shall mean any form resulting from mechanical
-transformation or translation of a Source form, including but
-not limited to compiled object code, generated documentation,
-and conversions to other media types.
-
-"Work" shall mean the work of authorship, whether in Source or
-Object form, made available under the License, as indicated by a
-copyright notice that is included in or attached to the work
-(an example is provided in the Appendix below).
-
-"Derivative Works" shall mean any work, whether in Source or Object
-form, that is based on (or derived from) the Work and for which the
-editorial revisions, annotations, elaborations, or other modifications
-represent, as a whole, an original work of authorship. For the purposes
-of this License, Derivative Works shall not include works that remain
-separable from, or merely link (or bind by name) to the interfaces of,
-the Work and Derivative Works thereof.
-
-"Contribution" shall mean any work of authorship, including
-the original version of the Work and any modifications or additions
-to that Work or Derivative Works thereof, that is intentionally
-submitted to Licensor for inclusion in the Work by the copyright owner
-or by an individual or Legal Entity authorized to submit on behalf of
-the copyright owner. For the purposes of this definition, "submitted"
-means any form of electronic, verbal, or written communication sent
-to the Licensor or its representatives, including but not limited to
-communication on electronic mailing lists, source code control systems,
-and issue tracking systems that are managed by, or on behalf of, the
-Licensor for the purpose of discussing and improving the Work, but
-excluding communication that is conspicuously marked or otherwise
-designated in writing by the copyright owner as "Not a Contribution."
-
-"Contributor" shall mean Licensor and any individual or Legal Entity
-on behalf of whom a Contribution has been received by Licensor and
-subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
-this License, each Contributor hereby grants to You a perpetual,
-worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-copyright license to reproduce, prepare Derivative Works of,
-publicly display, publicly perform, sublicense, and distribute the
-Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
-this License, each Contributor hereby grants to You a perpetual,
-worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-(except as stated in this section) patent license to make, have made,
-use, offer to sell, sell, import, and otherwise transfer the Work,
-where such license applies only to those patent claims licensable
-by such Contributor that are necessarily infringed by their
-Contribution(s) alone or by combination of their Contribution(s)
-with the Work to which such Contribution(s) was submitted. If You
-institute patent litigation against any entity (including a
-cross-claim or counterclaim in a lawsuit) alleging that the Work
-or a Contribution incorporated within the Work constitutes direct
-or contributory patent infringement, then any patent licenses
-granted to You under this License for that Work shall terminate
-as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
-Work or Derivative Works thereof in any medium, with or without
-modifications, and in Source or Object form, provided that You
-meet the following conditions:
-
-(a) You must give any other recipients of the Work or
-Derivative Works a copy of this License; and
-
-(b) You must cause any modified files to carry prominent notices
-stating that You changed the files; and
-
-(c) You must retain, in the Source form of any Derivative Works
-that You distribute, all copyright, patent, trademark, and
-attribution notices from the Source form of the Work,
-excluding those notices that do not pertain to any part of
-the Derivative Works; and
-
-(d) If the Work includes a "NOTICE" text file as part of its
-distribution, then any Derivative Works that You distribute must
-include a readable copy of the attribution notices contained
-within such NOTICE file, excluding those notices that do not
-pertain to any part of the Derivative Works, in at least one
-of the following places: within a NOTICE text file distributed
-as part of the Derivative Works; within the Source form or
-documentation, if provided along with the Derivative Works; or,
-within a display generated by the Derivative Works, if and
-wherever such third-party notices normally appear. The contents
-of the NOTICE file are for informational purposes only and
-do not modify the License. You may add Your own attribution
-notices within Derivative Works that You distribute, alongside
-or as an addendum to the NOTICE text from the Work, provided
-that such additional attribution notices cannot be construed
-as modifying the License.
-
-You may add Your own copyright statement to Your modifications and
-may provide additional or different license terms and conditions
-for use, reproduction, or distribution of Your modifications, or
-for any such Derivative Works as a whole, provided Your use,
-reproduction, and distribution of the Work otherwise complies with
-the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
-any Contribution intentionally submitted for inclusion in the Work
-by You to the Licensor shall be under the terms and conditions of
-this License, without any additional terms or conditions.
-Notwithstanding the above, nothing herein shall supersede or modify
-the terms of any separate license agreement you may have executed
-with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
-names, trademarks, service marks, or product names of the Licensor,
-except as required for reasonable and customary use in describing the
-origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
-agreed to in writing, Licensor provides the Work (and each
-Contributor provides its Contributions) on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-implied, including, without limitation, any warranties or conditions
-of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-PARTICULAR PURPOSE. You are solely responsible for determining the
-appropriateness of using or redistributing the Work and assume any
-risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
-whether in tort (including negligence), contract, or otherwise,
-unless required by applicable law (such as deliberate and grossly
-negligent acts) or agreed to in writing, shall any Contributor be
-liable to You for damages, including any direct, indirect, special,
-incidental, or consequential damages of any character arising as a
-result of this License or out of the use or inability to use the
-Work (including but not limited to damages for loss of goodwill,
-work stoppage, computer failure or malfunction, or any and all
-other commercial damages or losses), even if such Contributor
-has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
-the Work or Derivative Works thereof, You may choose to offer,
-and charge a fee for, acceptance of support, warranty, indemnity,
-or other liability obligations and/or rights consistent with this
-License. However, in accepting such obligations, You may act only
-on Your own behalf and on Your sole responsibility, not on behalf
-of any other Contributor, and only if You agree to indemnify,
-defend, and hold each Contributor harmless for any liability
-incurred by, or claims asserted against, such Contributor by reason
-of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work.
-
-To apply the Apache License to your work, attach the following
-boilerplate notice, with the fields enclosed by brackets "[]"
-replaced with your own identifying information. (Don't include
-the brackets!)  The text should be enclosed in the appropriate
-comment syntax for the file format. We also recommend that a
-file or class name and description of purpose be included on the
-same "printed page" as the copyright notice for easier
-identification within third-party archives.
-
-Copyright 2019 - present, Facebook, Inc
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md b/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md
deleted file mode 100644
index 07b81ff..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md
+++ /dev/null
@@ -1,903 +0,0 @@
-# Detectron2 Model Zoo and Baselines
-
-## Introduction
-
-This file documents a large collection of baselines trained
-with detectron2 in Sep-Oct, 2019.
-All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
-servers with 8 NVIDIA V100 GPUs & NVLink. The software in use were PyTorch 1.3, CUDA 9.2, cuDNN 7.4.2 or 7.6.3.
-You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
-
-In addition to these official baseline models, you can find more models in [projects/](projects/).
-
-#### How to Read the Tables
-* The "Name" column contains a link to the config file. Running `tools/train_net.py` with this config file
-	and 8 GPUs will reproduce the model.
-* Training speed is averaged across the entire training.
-	We keep updating the speed with latest version of detectron2/pytorch/etc.,
-	so they might be different from the `metrics` file.
-	Training speed for multi-machine jobs is not provided.
-* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
-  with batch size 1 in detectron2 directly.
-	Measuring it with your own code will likely introduce other overhead.
-  Actual deployment in production should in general be faster than the given inference
-  speed due to more optimizations.
-* The *model id* column is provided for ease of reference.
-  To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
-* Training curves and other statistics can be found in `metrics` for each model.
-
-#### Common Settings for COCO Models
-* All COCO models were trained on `train2017` and evaluated on `val2017`.
-* The default settings are __not directly comparable__ with Detectron's standard settings.
-  For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
-
-  To make fair comparisons with Detectron's settings, see
-  [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
-  and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
-  for speed comparison.
-* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
-  * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
-    respectively. It obtains the best
-    speed/accuracy tradeoff, but the other two are still useful for research.
-  * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
-  * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
-    for mask and box prediction, respectively.
-    This is used by the Deformable ConvNet paper.
-* Most models are trained with the 3x schedule (~37 COCO epochs).
-  Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
-  training schedule for comparison when doing quick research iteration.
-
-#### ImageNet Pretrained Models
-
-We provide backbone models pretrained on ImageNet-1k dataset.
-These models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
-* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
-* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
-* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
-
-Pretrained models in Detectron's format can still be used. For example:
-* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
-  ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
-* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
-  ResNet-50 with Group Normalization.
-* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
-  ResNet-101 with Group Normalization.
-
-Torchvision's ResNet models can be used after converted by [this script](tools/convert-torchvision-to-d2.py).
-
-#### License
-
-All models available for download through this document are licensed under the
-[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
-
-### COCO Object Detection Baselines
-
-#### Faster R-CNN:
-<!--
-(fb only) To update the table in vim:
-1. Remove the old table: d}
-2. Copy the below command to the place of the table
-3. :.!bash
-
-./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: faster_rcnn_R_50_C4_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
-<td align="center">1x</td>
-<td align="center">0.551</td>
-<td align="center">0.102</td>
-<td align="center">4.8</td>
-<td align="center">35.7</td>
-<td align="center">137257644</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_DC5_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
-<td align="center">1x</td>
-<td align="center">0.380</td>
-<td align="center">0.068</td>
-<td align="center">5.0</td>
-<td align="center">37.3</td>
-<td align="center">137847829</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.210</td>
-<td align="center">0.038</td>
-<td align="center">3.0</td>
-<td align="center">37.9</td>
-<td align="center">137257794</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_C4_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
-<td align="center">3x</td>
-<td align="center">0.543</td>
-<td align="center">0.104</td>
-<td align="center">4.8</td>
-<td align="center">38.4</td>
-<td align="center">137849393</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_DC5_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
-<td align="center">3x</td>
-<td align="center">0.378</td>
-<td align="center">0.070</td>
-<td align="center">5.0</td>
-<td align="center">39.0</td>
-<td align="center">137849425</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.209</td>
-<td align="center">0.038</td>
-<td align="center">3.0</td>
-<td align="center">40.2</td>
-<td align="center">137849458</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_101_C4_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
-<td align="center">3x</td>
-<td align="center">0.619</td>
-<td align="center">0.139</td>
-<td align="center">5.9</td>
-<td align="center">41.1</td>
-<td align="center">138204752</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_101_DC5_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
-<td align="center">3x</td>
-<td align="center">0.452</td>
-<td align="center">0.086</td>
-<td align="center">6.1</td>
-<td align="center">40.6</td>
-<td align="center">138204841</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_101_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.286</td>
-<td align="center">0.051</td>
-<td align="center">4.1</td>
-<td align="center">42.0</td>
-<td align="center">137851257</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.638</td>
-<td align="center">0.098</td>
-<td align="center">6.7</td>
-<td align="center">43.0</td>
-<td align="center">139173657</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-#### RetinaNet:
-<!--
-./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: retinanet_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
-<td align="center">1x</td>
-<td align="center">0.200</td>
-<td align="center">0.055</td>
-<td align="center">3.9</td>
-<td align="center">36.5</td>
-<td align="center">137593951</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/model_final_b796dc.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: retinanet_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
-<td align="center">3x</td>
-<td align="center">0.201</td>
-<td align="center">0.055</td>
-<td align="center">3.9</td>
-<td align="center">37.9</td>
-<td align="center">137849486</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: retinanet_R_101_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
-<td align="center">3x</td>
-<td align="center">0.280</td>
-<td align="center">0.068</td>
-<td align="center">5.1</td>
-<td align="center">39.9</td>
-<td align="center">138363263</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/model_final_59f53c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-#### RPN & Fast R-CNN:
-<!--
-./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
--->
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">prop.<br/>AR</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: rpn_R_50_C4_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
-<td align="center">1x</td>
-<td align="center">0.130</td>
-<td align="center">0.034</td>
-<td align="center">1.5</td>
-<td align="center"></td>
-<td align="center">51.6</td>
-<td align="center">137258005</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: rpn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.186</td>
-<td align="center">0.032</td>
-<td align="center">2.7</td>
-<td align="center"></td>
-<td align="center">58.0</td>
-<td align="center">137258492</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: fast_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.140</td>
-<td align="center">0.029</td>
-<td align="center">2.6</td>
-<td align="center">37.8</td>
-<td align="center"></td>
-<td align="center">137635226</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-### COCO Instance Segmentation Baselines with Mask R-CNN
-<!--
-./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
--->
-
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: mask_rcnn_R_50_C4_1x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
-<td align="center">1x</td>
-<td align="center">0.584</td>
-<td align="center">0.110</td>
-<td align="center">5.2</td>
-<td align="center">36.8</td>
-<td align="center">32.2</td>
-<td align="center">137259246</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_DC5_1x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
-<td align="center">1x</td>
-<td align="center">0.471</td>
-<td align="center">0.076</td>
-<td align="center">6.5</td>
-<td align="center">38.3</td>
-<td align="center">34.2</td>
-<td align="center">137260150</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.261</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">38.6</td>
-<td align="center">35.2</td>
-<td align="center">137260431</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_C4_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
-<td align="center">3x</td>
-<td align="center">0.575</td>
-<td align="center">0.111</td>
-<td align="center">5.2</td>
-<td align="center">39.8</td>
-<td align="center">34.4</td>
-<td align="center">137849525</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_DC5_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
-<td align="center">3x</td>
-<td align="center">0.470</td>
-<td align="center">0.076</td>
-<td align="center">6.5</td>
-<td align="center">40.0</td>
-<td align="center">35.9</td>
-<td align="center">137849551</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.261</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">41.0</td>
-<td align="center">37.2</td>
-<td align="center">137849600</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_101_C4_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
-<td align="center">3x</td>
-<td align="center">0.652</td>
-<td align="center">0.145</td>
-<td align="center">6.3</td>
-<td align="center">42.6</td>
-<td align="center">36.7</td>
-<td align="center">138363239</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_101_DC5_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
-<td align="center">3x</td>
-<td align="center">0.545</td>
-<td align="center">0.092</td>
-<td align="center">7.6</td>
-<td align="center">41.9</td>
-<td align="center">37.3</td>
-<td align="center">138363294</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_101_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.340</td>
-<td align="center">0.056</td>
-<td align="center">4.6</td>
-<td align="center">42.9</td>
-<td align="center">38.6</td>
-<td align="center">138205316</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.690</td>
-<td align="center">0.103</td>
-<td align="center">7.2</td>
-<td align="center">44.3</td>
-<td align="center">39.5</td>
-<td align="center">139653917</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
-<!--
-./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*'  --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">kp.<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.315</td>
-<td align="center">0.072</td>
-<td align="center">5.0</td>
-<td align="center">53.6</td>
-<td align="center">64.0</td>
-<td align="center">137261548</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.316</td>
-<td align="center">0.066</td>
-<td align="center">5.0</td>
-<td align="center">55.4</td>
-<td align="center">65.5</td>
-<td align="center">137849621</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.390</td>
-<td align="center">0.076</td>
-<td align="center">6.1</td>
-<td align="center">56.4</td>
-<td align="center">66.1</td>
-<td align="center">138363331</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.738</td>
-<td align="center">0.121</td>
-<td align="center">8.7</td>
-<td align="center">57.3</td>
-<td align="center">66.0</td>
-<td align="center">139686956</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-### COCO Panoptic Segmentation Baselines with Panoptic FPN
-<!--
-./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*'  --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">PQ</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: panoptic_fpn_R_50_1x -->
- <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.304</td>
-<td align="center">0.053</td>
-<td align="center">4.8</td>
-<td align="center">37.6</td>
-<td align="center">34.7</td>
-<td align="center">39.4</td>
-<td align="center">139514544</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: panoptic_fpn_R_50_3x -->
- <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.302</td>
-<td align="center">0.053</td>
-<td align="center">4.8</td>
-<td align="center">40.0</td>
-<td align="center">36.5</td>
-<td align="center">41.5</td>
-<td align="center">139514569</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: panoptic_fpn_R_101_3x -->
- <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.392</td>
-<td align="center">0.066</td>
-<td align="center">6.0</td>
-<td align="center">42.4</td>
-<td align="center">38.5</td>
-<td align="center">43.0</td>
-<td align="center">139514519</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-### LVIS Instance Segmentation Baselines with Mask R-CNN
-
-Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
-These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
-
-NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
-They are roughly 24 epochs of LVISv0.5 data.
-The final results of these configs have large variance across different runs.
-
-<!--
-./gen_html_table.py --config 'LVIS-InstanceSegmentation/mask*50*' 'LVIS-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: mask_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.292</td>
-<td align="center">0.107</td>
-<td align="center">7.1</td>
-<td align="center">23.6</td>
-<td align="center">24.4</td>
-<td align="center">144219072</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_101_FPN_1x -->
- <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.371</td>
-<td align="center">0.114</td>
-<td align="center">7.8</td>
-<td align="center">25.6</td>
-<td align="center">25.9</td>
-<td align="center">144219035</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
- <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.712</td>
-<td align="center">0.151</td>
-<td align="center">10.2</td>
-<td align="center">26.7</td>
-<td align="center">27.1</td>
-<td align="center">144219108</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-
-### Cityscapes & Pascal VOC Baselines
-
-Simple baselines for
-* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
-* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
-
-<!--
-./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">box<br/>AP50</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: mask_rcnn_R_50_FPN -->
- <tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
-<td align="center">0.240</td>
-<td align="center">0.078</td>
-<td align="center">4.4</td>
-<td align="center"></td>
-<td align="center"></td>
-<td align="center">36.5</td>
-<td align="center">142423278</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: faster_rcnn_R_50_C4 -->
- <tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
-<td align="center">0.537</td>
-<td align="center">0.081</td>
-<td align="center">4.8</td>
-<td align="center">51.9</td>
-<td align="center">80.3</td>
-<td align="center"></td>
-<td align="center">142202221</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-
-### Other Settings
-
-Ablations for Deformable Conv and Cascade R-CNN:
-
-<!--
-./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN"  --fields lr_sched train_speed inference_speed mem box_AP mask_AP
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: mask_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
-<td align="center">1x</td>
-<td align="center">0.261</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">38.6</td>
-<td align="center">35.2</td>
-<td align="center">137260431</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
- <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
-<td align="center">1x</td>
-<td align="center">0.342</td>
-<td align="center">0.048</td>
-<td align="center">3.5</td>
-<td align="center">41.5</td>
-<td align="center">37.5</td>
-<td align="center">138602867</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
-<td align="center">1x</td>
-<td align="center">0.317</td>
-<td align="center">0.052</td>
-<td align="center">4.0</td>
-<td align="center">42.1</td>
-<td align="center">36.4</td>
-<td align="center">138602847</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.261</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">41.0</td>
-<td align="center">37.2</td>
-<td align="center">137849600</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
- <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
-<td align="center">3x</td>
-<td align="center">0.349</td>
-<td align="center">0.047</td>
-<td align="center">3.5</td>
-<td align="center">42.7</td>
-<td align="center">38.5</td>
-<td align="center">144998336</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
-<td align="center">3x</td>
-<td align="center">0.328</td>
-<td align="center">0.053</td>
-<td align="center">4.0</td>
-<td align="center">44.3</td>
-<td align="center">38.5</td>
-<td align="center">144998488</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
-(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
-<!--
-./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
-   -->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: mask_rcnn_R_50_FPN_3x -->
- <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
-<td align="center">3x</td>
-<td align="center">0.261</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">41.0</td>
-<td align="center">37.2</td>
-<td align="center">137849600</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
- <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
-<td align="center">3x</td>
-<td align="center">0.356</td>
-<td align="center">0.069</td>
-<td align="center">7.3</td>
-<td align="center">42.6</td>
-<td align="center">38.6</td>
-<td align="center">138602888</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
- <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
-<td align="center">3x</td>
-<td align="center">0.371</td>
-<td align="center">0.053</td>
-<td align="center">5.5</td>
-<td align="center">41.9</td>
-<td align="center">37.8</td>
-<td align="center">169527823</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
- <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
-<td align="center">3x</td>
-<td align="center">0.400</td>
-<td align="center">0.069</td>
-<td align="center">9.8</td>
-<td align="center">39.9</td>
-<td align="center">36.6</td>
-<td align="center">138602908</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
- <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
-<td align="center">9x</td>
-<td align="center">N/A</td>
-<td align="center">0.070</td>
-<td align="center">9.8</td>
-<td align="center">43.7</td>
-<td align="center">39.6</td>
-<td align="center">183808979</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
- <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
-<td align="center">9x</td>
-<td align="center">N/A</td>
-<td align="center">0.055</td>
-<td align="center">7.2</td>
-<td align="center">43.6</td>
-<td align="center">39.3</td>
-<td align="center">184226666</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
-
-<!--
-./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
-# manually add TTA results
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">PQ</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
- <tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
-<td align="center">0.107</td>
-<td align="center">11.4</td>
-<td align="center">47.4</td>
-<td align="center">41.3</td>
-<td align="center">46.1</td>
-<td align="center">139797668</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
- <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
-<td align="center">0.242</td>
-<td align="center">15.1</td>
-<td align="center">50.2</td>
-<td align="center">44.0</td>
-<td align="center"></td>
-<td align="center">18131413</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
- <tr><td align="left">above + test-time aug.</td>
-<td align="center"></td>
-<td align="center"></td>
-<td align="center">51.9</td>
-<td align="center">45.9</td>
-<td align="center"></td>
-<td align="center"></td>
-<td align="center"></td>
-</tr>
-</tbody></table>
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/README.md b/preprocess/humanparsing/mhp_extension/detectron2/README.md
deleted file mode 100644
index 1fbb95b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-<img src=".github/Detectron2-Logo-Horz.svg" width="300" >
-
-Detectron2 is Facebook AI Research's next generation software system
-that implements state-of-the-art object detection algorithms.
-It is a ground-up rewrite of the previous version,
-[Detectron](https://github.com/facebookresearch/Detectron/),
-and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
-
-<div align="center">
-  <img src="https://user-images.githubusercontent.com/1381301/66535560-d3422200-eace-11e9-9123-5535d469db19.png"/>
-</div>
-
-### What's New
-* It is powered by the [PyTorch](https://pytorch.org) deep learning framework.
-* Includes more features such as panoptic segmentation, densepose, Cascade R-CNN, rotated bounding boxes, etc.
-* Can be used as a library to support [different projects](projects/) on top of it.
-  We'll open source more research projects in this way.
-* It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
-
-See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
-to see more demos and learn about detectron2.
-
-## Installation
-
-See [INSTALL.md](INSTALL.md).
-
-## Quick Start
-
-See [GETTING_STARTED.md](GETTING_STARTED.md),
-or the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5).
-
-Learn more at our [documentation](https://detectron2.readthedocs.org).
-And see [projects/](projects/) for some projects that are built on top of detectron2.
-
-## Model Zoo and Baselines
-
-We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
-
-
-## License
-
-Detectron2 is released under the [Apache 2.0 license](LICENSE).
-
-## Citing Detectron2
-
-If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
-
-```BibTeX
-@misc{wu2019detectron2,
-  author =       {Yuxin Wu and Alexander Kirillov and Francisco Massa and
-                  Wan-Yen Lo and Ross Girshick},
-  title =        {Detectron2},
-  howpublished = {\url{https://github.com/facebookresearch/detectron2}},
-  year =         {2019}
-}
-```
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml
deleted file mode 100644
index fbf34a0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  RPN:
-    PRE_NMS_TOPK_TEST: 6000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "Res5ROIHeads"
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml
deleted file mode 100644
index c0d6d16..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  RESNETS:
-    OUT_FEATURES: ["res5"]
-    RES5_DILATION: 2
-  RPN:
-    IN_FEATURES: ["res5"]
-    PRE_NMS_TOPK_TEST: 6000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["res5"]
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml
deleted file mode 100644
index 3e020f2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml
deleted file mode 100644
index 12ec9d2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "RetinaNet"
-  BACKBONE:
-    NAME: "build_retinanet_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
-  FPN:
-    IN_FEATURES: ["res3", "res4", "res5"]
-  RETINANET:
-    IOU_THRESHOLDS: [0.4, 0.5]
-    IOU_LABELS: [0, -1, 1]
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index 773ac10..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  LOAD_PROPOSALS: True
-  RESNETS:
-    DEPTH: 50
-  PROPOSAL_GENERATOR:
-    NAME: "PrecomputedProposals"
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
-  TEST: ("coco_2017_val",)
-  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
-DATALOADER:
-  # proposals are part of the dataset_dicts, and take a lot of RAM
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
deleted file mode 100644
index db142cd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
deleted file mode 100644
index bceb6b3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
deleted file mode 100644
index 57a098f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
deleted file mode 100644
index f961301..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
deleted file mode 100644
index bc51bce..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
deleted file mode 100644
index 0fe96f5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
deleted file mode 100644
index 33fadeb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index 3262019..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
deleted file mode 100644
index 4139518..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
deleted file mode 100644
index 9c9b5ab..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  MASK_ON: False
-  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
-  PIXEL_STD: [57.375, 57.120, 58.395]
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
deleted file mode 100644
index 4abb1b9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "../Base-RetinaNet.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
deleted file mode 100644
index 4a24ce3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: "../Base-RetinaNet.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
deleted file mode 100644
index 3b5412d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "../Base-RetinaNet.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
deleted file mode 100644
index e048211..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  META_ARCHITECTURE: "ProposalNetwork"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-  RPN:
-    PRE_NMS_TOPK_TEST: 12000
-    POST_NMS_TOPK_TEST: 2000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
deleted file mode 100644
index dc9c952..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "ProposalNetwork"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-  RPN:
-    POST_NMS_TOPK_TEST: 2000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
deleted file mode 100644
index 1a94cc4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
deleted file mode 100644
index 67b70cf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
deleted file mode 100644
index 1935a30..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
deleted file mode 100644
index a9aeb4e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
deleted file mode 100644
index 38ed867..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
deleted file mode 100644
index b13eefa..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
deleted file mode 100644
index d401016..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-DilatedC5.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index d50fb86..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
deleted file mode 100644
index be7d06b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
deleted file mode 100644
index d14c63f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
-  PIXEL_STD: [57.375, 57.120, 58.395]
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
deleted file mode 100644
index 4e03944..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  KEYPOINT_ON: True
-  ROI_HEADS:
-    NUM_CLASSES: 1
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
-  RPN:
-    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
-    # 1000 proposals per-image is found to hurt box AP.
-    # Therefore we increase it to 1500 per-image.
-    POST_NMS_TOPK_TRAIN: 1500
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_train",)
-  TEST: ("keypoints_coco_2017_val",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
deleted file mode 100644
index 9309535..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index 7bf85cf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
deleted file mode 100644
index a07f243..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
deleted file mode 100644
index d4bfa20..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
-  PIXEL_STD: [57.375, 57.120, 58.395]
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
deleted file mode 100644
index 755c120..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "PanopticFPN"
-  MASK_ON: True
-  SEM_SEG_HEAD:
-    LOSS_WEIGHT: 0.5
-DATASETS:
-  TRAIN: ("coco_2017_train_panoptic_separated",)
-  TEST: ("coco_2017_val_panoptic_separated",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
deleted file mode 100644
index 0e01f6f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-Panoptic-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
deleted file mode 100644
index 6afa2c1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: "Base-Panoptic-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
deleted file mode 100644
index b956b3f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-Panoptic-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
deleted file mode 100644
index 1a7aaeb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  # For better, more stable performance initialize from COCO
-  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
-  MASK_ON: True
-  ROI_HEADS:
-    NUM_CLASSES: 8
-# This is similar to the setting used in Mask R-CNN paper, Appendix A
-# But there are some differences, e.g., we did not initialize the output
-# layer using the corresponding classes from COCO
-INPUT:
-  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
-  MIN_SIZE_TRAIN_SAMPLING: "choice"
-  MIN_SIZE_TEST: 1024
-  MAX_SIZE_TRAIN: 2048
-  MAX_SIZE_TEST: 2048
-DATASETS:
-  TRAIN: ("cityscapes_fine_instance_seg_train",)
-  TEST: ("cityscapes_fine_instance_seg_val",)
-SOLVER:
-  BASE_LR: 0.01
-  STEPS: (18000,)
-  MAX_ITER: 24000
-  IMS_PER_BATCH: 8
-TEST:
-  EVAL_PERIOD: 8000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md
deleted file mode 100644
index a90ed9e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-
-Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
-
-The differences in implementation details are shared in
-[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
-
-The differences in model zoo's experimental settings include:
-* Use scale augmentation during training. This improves AP with lower training cost.
-* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
-  affect other AP.
-* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
-* Use `ROIAlignV2`. This does not significantly affect AP.
-
-In this directory, we provide a few configs that __do not__ have the above changes.
-They mimic Detectron's behavior as close as possible,
-and provide a fair comparison of accuracy and speed against Detectron.
-
-<!--
-./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
--->
-
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">kp.<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
- <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
-<td align="center">1x</td>
-<td align="center">0.219</td>
-<td align="center">0.038</td>
-<td align="center">3.1</td>
-<td align="center">36.9</td>
-<td align="center"></td>
-<td align="center"></td>
-<td align="center">137781054</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
- <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
-<td align="center">1x</td>
-<td align="center">0.313</td>
-<td align="center">0.071</td>
-<td align="center">5.0</td>
-<td align="center">53.1</td>
-<td align="center"></td>
-<td align="center">64.2</td>
-<td align="center">137781195</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
- <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
-<td align="center">1x</td>
-<td align="center">0.273</td>
-<td align="center">0.043</td>
-<td align="center">3.4</td>
-<td align="center">37.8</td>
-<td align="center">34.9</td>
-<td align="center"></td>
-<td align="center">137781281</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-## Comparisons:
-
-* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
-* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
-  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
-	compensated back by some parameter tuning.
-* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
-
-For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
deleted file mode 100644
index 6ce77f1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-  # Detectron1 uses smooth L1 loss with some magic beta values.
-  # The defaults are changed to L1 loss in Detectron2.
-  RPN:
-    SMOOTH_L1_BETA: 0.1111
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 1.0
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-INPUT:
-  # no scale augmentation
-  MIN_SIZE_TRAIN: (800, )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index aacf868..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  KEYPOINT_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 1
-  ROI_KEYPOINT_HEAD:
-    POOLER_RESOLUTION: 14
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-  # Detectron1 uses smooth L1 loss with some magic beta values.
-  # The defaults are changed to L1 loss in Detectron2.
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 1.0
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-  RPN:
-    SMOOTH_L1_BETA: 0.1111
-    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
-    # 1000 proposals per-image is found to hurt box AP.
-    # Therefore we increase it to 1500 per-image.
-    POST_NMS_TOPK_TRAIN: 1500
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_train",)
-  TEST: ("keypoints_coco_2017_val",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
deleted file mode 100644
index 4ea86a8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  # Detectron1 uses smooth L1 loss with some magic beta values.
-  # The defaults are changed to L1 loss in Detectron2.
-  RPN:
-    SMOOTH_L1_BETA: 0.1111
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 1.0
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-  ROI_MASK_HEAD:
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-INPUT:
-  # no scale augmentation
-  MIN_SIZE_TRAIN: (800, )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
deleted file mode 100644
index f0c3a1b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 101
-  ROI_HEADS:
-    NUM_CLASSES: 1230
-    SCORE_THRESH_TEST: 0.0001
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-DATASETS:
-  TRAIN: ("lvis_v0.5_train",)
-  TEST: ("lvis_v0.5_val",)
-TEST:
-  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
-DATALOADER:
-  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
-  REPEAT_THRESHOLD: 0.001
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index 64b4caa..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 1230
-    SCORE_THRESH_TEST: 0.0001
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-DATASETS:
-  TRAIN: ("lvis_v0.5_train",)
-  TEST: ("lvis_v0.5_val",)
-TEST:
-  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
-DATALOADER:
-  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
-  REPEAT_THRESHOLD: 0.001
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
deleted file mode 100644
index c8b822c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
-  PIXEL_STD: [57.375, 57.120, 58.395]
-  MASK_ON: True
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 101
-  ROI_HEADS:
-    NUM_CLASSES: 1230
-    SCORE_THRESH_TEST: 0.0001
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-DATASETS:
-  TRAIN: ("lvis_v0.5_train",)
-  TEST: ("lvis_v0.5_val",)
-TEST:
-  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
-DATALOADER:
-  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
-  REPEAT_THRESHOLD: 0.001
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
deleted file mode 100644
index abb33b6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NAME: CascadeROIHeads
-  ROI_BOX_HEAD:
-    CLS_AGNOSTIC_BBOX_REG: True
-  RPN:
-    POST_NMS_TOPK_TRAIN: 2000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
deleted file mode 100644
index e2201ad..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NAME: CascadeROIHeads
-  ROI_BOX_HEAD:
-    CLS_AGNOSTIC_BBOX_REG: True
-  RPN:
-    POST_NMS_TOPK_TRAIN: 2000
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
deleted file mode 100644
index fc117f6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 152
-    DEFORM_ON_PER_STAGE: [False, True, True, True]
-  ROI_HEADS:
-    NAME: "CascadeROIHeads"
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_CONV: 4
-    NUM_FC: 1
-    NORM: "GN"
-    CLS_AGNOSTIC_BBOX_REG: True
-  ROI_MASK_HEAD:
-    NUM_CONV: 8
-    NORM: "GN"
-  RPN:
-    POST_NMS_TOPK_TRAIN: 2000
-SOLVER:
-  IMS_PER_BATCH: 128
-  STEPS: (35000, 45000)
-  MAX_ITER: 50000
-  BASE_LR: 0.16
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-  CROP:
-    ENABLED: True
-TEST:
-  EVAL_PERIOD: 2500
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml
deleted file mode 100644
index 544f58f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  MASK_ON: True
-#   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
-  WEIGHTS: "model_0039999_e76410.pkl"
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 152
-    DEFORM_ON_PER_STAGE: [False, True, True, True]
-  ROI_HEADS:
-    NAME: "CascadeROIHeads"
-    NUM_CLASSES: 1
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_CONV: 4
-    NUM_FC: 1
-    NORM: "GN"
-    CLS_AGNOSTIC_BBOX_REG: True
-  ROI_MASK_HEAD:
-    NUM_CONV: 8
-    NORM: "GN"
-  RPN:
-    POST_NMS_TOPK_TRAIN: 2000
-SOLVER:
-#   IMS_PER_BATCH: 128
-  IMS_PER_BATCH: 1
-  STEPS: (35000, 45000)
-  MAX_ITER: 50000
-  BASE_LR: 0.16
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-  CROP:
-    ENABLED: True
-TEST:
-  EVAL_PERIOD: 2500
-DATASETS:
-  TRAIN: ("CIHP_train","VIP_trainval")
-  TEST: ("CIHP_val",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml
deleted file mode 100644
index bbf9685..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"
-MODEL:
-  MASK_ON: True
-  ROI_HEADS:
-    NMS_THRESH_TEST: 0.95
-    SCORE_THRESH_TEST: 0.5
-    NUM_CLASSES: 1
-SOLVER:
-  IMS_PER_BATCH: 1
-  STEPS: (30000, 45000)
-  MAX_ITER: 50000
-  BASE_LR: 0.02
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-  CROP:
-    ENABLED: True
-TEST:
-  AUG:
-    ENABLED: True
-DATASETS:
-  TRAIN: ("demo_train",)
-  TEST: ("demo_val",)
-OUTPUT_DIR: "../../data/DemoDataset/detectron2_prediction"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
deleted file mode 100644
index 4c3b767..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_BOX_HEAD:
-    CLS_AGNOSTIC_BBOX_REG: True
-  ROI_MASK_HEAD:
-    CLS_AGNOSTIC_MASK: True
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
deleted file mode 100644
index 04ff988..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
-    DEFORM_MODULATED: False
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
deleted file mode 100644
index 68c0ca5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
-    DEFORM_MODULATED: False
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
deleted file mode 100644
index 74d274e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-    NORM: "GN"
-    STRIDE_IN_1X1: False
-  FPN:
-    NORM: "GN"
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_CONV: 4
-    NUM_FC: 1
-    NORM: "GN"
-  ROI_MASK_HEAD:
-    NORM: "GN"
-SOLVER:
-  # 3x schedule
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
deleted file mode 100644
index 11ebb07..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-    NORM: "SyncBN"
-    STRIDE_IN_1X1: True
-  FPN:
-    NORM: "SyncBN"
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_CONV: 4
-    NUM_FC: 1
-    NORM: "SyncBN"
-  ROI_MASK_HEAD:
-    NORM: "SyncBN"
-SOLVER:
-  # 3x schedule
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
-TEST:
-  PRECISE_BN:
-    ENABLED: True
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
deleted file mode 100644
index 34016ce..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-# A large PanopticFPN for demo purposes.
-# Use GN on backbone to support semantic seg.
-# Use Cascade + Deform Conv to improve localization.
-_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
-MODEL:
-  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
-  RESNETS:
-    DEPTH: 101
-    NORM: "GN"
-    DEFORM_ON_PER_STAGE: [False, True, True, True]
-    STRIDE_IN_1X1: False
-  FPN:
-    NORM: "GN"
-  ROI_HEADS:
-    NAME: CascadeROIHeads
-  ROI_BOX_HEAD:
-    CLS_AGNOSTIC_BBOX_REG: True
-  ROI_MASK_HEAD:
-    NORM: "GN"
-  RPN:
-    POST_NMS_TOPK_TRAIN: 2000
-SOLVER:
-  STEPS: (105000, 125000)
-  MAX_ITER: 135000
-  IMS_PER_BATCH: 32
-  BASE_LR: 0.04
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml
deleted file mode 100644
index 766f46a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "model_0039999_e76410.pkl"
-  ROI_HEADS:
-    NUM_CLASSES: 1
-SOLVER:
-  IMS_PER_BATCH: 16
-  STEPS: (140000, 180000)
-  MAX_ITER: 200000
-  BASE_LR: 0.02
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-  CROP:
-    ENABLED: True
-TEST:
-  EVAL_PERIOD: 0
-DATASETS:
-  TRAIN: ("CIHP_train")
-  TEST: ("CIHP_val",)
-OUTPUT_DIR: "./finetune_output"
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml
deleted file mode 100644
index d6a529b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "./finetune_ouput/model_final.pth"
-  ROI_HEADS:
-    NMS_THRESH_TEST: 0.95
-    SCORE_THRESH_TEST: 0.5
-    NUM_CLASSES: 1
-SOLVER:
-  IMS_PER_BATCH: 1
-  STEPS: (30000, 45000)
-  MAX_ITER: 50000
-  BASE_LR: 0.02
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-  CROP:
-    ENABLED: True
-TEST:
-  AUG:
-    ENABLED: True
-DATASETS:
-  TRAIN: ("CIHP_trainval",)
-  TEST: ("CIHP_test",)
-OUTPUT_DIR: "./inference_output"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
deleted file mode 100644
index f340028..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
-MODEL:
-  # Train from random initialization.
-  WEIGHTS: ""
-  # It makes sense to divide by STD when training from scratch
-  # But it seems to make no difference on the results and C2's models didn't do this.
-  # So we keep things consistent with C2.
-  # PIXEL_STD: [57.375, 57.12, 58.395]
-  MASK_ON: True
-  BACKBONE:
-    FREEZE_AT: 0
-# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
-# to learn what you need for training from scratch.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
deleted file mode 100644
index d90c9ff..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
-MODEL:
-  PIXEL_STD: [57.375, 57.12, 58.395]
-  WEIGHTS: ""
-  MASK_ON: True
-  RESNETS:
-    STRIDE_IN_1X1: False
-  BACKBONE:
-    FREEZE_AT: 0
-SOLVER:
-  # 9x schedule
-  IMS_PER_BATCH: 64  # 4x the standard
-  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
-  MAX_ITER: 202500   # 90k * 9 / 4
-  BASE_LR: 0.08
-TEST:
-  EVAL_PERIOD: 2500
-# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
-# to learn what you need for training from scratch.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
deleted file mode 100644
index 60d4e42..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
-MODEL:
-  PIXEL_STD: [57.375, 57.12, 58.395]
-  WEIGHTS: ""
-  MASK_ON: True
-  RESNETS:
-    STRIDE_IN_1X1: False
-  BACKBONE:
-    FREEZE_AT: 0
-SOLVER:
-  # 9x schedule
-  IMS_PER_BATCH: 64  # 4x the standard
-  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
-  MAX_ITER: 202500   # 90k * 9 / 4
-  BASE_LR: 0.08
-TEST:
-  EVAL_PERIOD: 2500
-# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
-# to learn what you need for training from scratch.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml
deleted file mode 100644
index ac256e1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "SemanticSegmentor"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-DATASETS:
-  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
-  TEST: ("coco_2017_val_panoptic_stuffonly",)
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
deleted file mode 100644
index ea2a6ba..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 20
-INPUT:
-  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
-  MIN_SIZE_TEST: 800
-DATASETS:
-  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
-  TEST: ('voc_2007_test',)
-SOLVER:
-  STEPS: (12000, 16000)
-  MAX_ITER: 18000  # 17.4 epochs
-  WARMUP_ITERS: 100
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
deleted file mode 100644
index e554cab..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 20
-INPUT:
-  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
-  MIN_SIZE_TEST: 800
-DATASETS:
-  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
-  TEST: ('voc_2007_test',)
-SOLVER:
-  STEPS: (12000, 16000)
-  MAX_ITER: 18000  # 17.4 epochs
-  WARMUP_ITERS: 100
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml
deleted file mode 100644
index d649eed..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 2
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md
deleted file mode 100644
index a278199..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md
+++ /dev/null
@@ -1 +0,0 @@
-These are quick configs for performance or accuracy regression tracking purposes.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index fc5a411..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index e41a0fe..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index a2f37e5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 52fc0ec..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
-  TEST: ("coco_2017_val_100",)
-  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index 14cf2aa..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
-DATASETS:
-  TEST: ("keypoints_coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index dc09034..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  KEYPOINT_ON: True
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_val_100",)
-  TEST: ("keypoints_coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
deleted file mode 100644
index 4b92392..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  KEYPOINT_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    BATCH_SIZE_PER_IMAGE: 256
-    NUM_CLASSES: 1
-  ROI_KEYPOINT_HEAD:
-    POOLER_RESOLUTION: 14
-    POOLER_SAMPLING_RATIO: 2
-    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
-    LOSS_WEIGHT: 4.0
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
-  RPN:
-    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_val",)
-  TEST: ("keypoints_coco_2017_val",)
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-SOLVER:
-  WARMUP_FACTOR: 0.33333333
-  WARMUP_ITERS: 100
-  STEPS: (5500, 5800)
-  MAX_ITER: 6000
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
deleted file mode 100644
index 9bd9628..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  KEYPOINT_ON: True
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    BATCH_SIZE_PER_IMAGE: 256
-    NUM_CLASSES: 1
-  ROI_KEYPOINT_HEAD:
-    POOLER_RESOLUTION: 14
-    POOLER_SAMPLING_RATIO: 2
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
-  RPN:
-    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_val",)
-  TEST: ("keypoints_coco_2017_val",)
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-SOLVER:
-  WARMUP_FACTOR: 0.33333333
-  WARMUP_ITERS: 100
-  STEPS: (5500, 5800)
-  MAX_ITER: 6000
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
deleted file mode 100644
index ab6e698..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.001
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-  CLIP_GRADIENTS:
-    ENABLED: True
-    CLIP_TYPE: "value"
-    CLIP_VALUE: 1.0
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
deleted file mode 100644
index b2d5b7f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
deleted file mode 100644
index 6c4f121..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.001
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
deleted file mode 100644
index f68dd8f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-_BASE_: "../Base-RCNN-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  ROI_HEADS:
-    BATCH_SIZE_PER_IMAGE: 256
-  MASK_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_val",)
-  TEST: ("coco_2017_val",)
-INPUT:
-  MIN_SIZE_TRAIN: (600,)
-  MAX_SIZE_TRAIN: 1000
-  MIN_SIZE_TEST: 800
-  MAX_SIZE_TEST: 1000
-SOLVER:
-  IMS_PER_BATCH: 8  # base uses 16
-  WARMUP_FACTOR: 0.33333
-  WARMUP_ITERS: 100
-  STEPS: (11000, 11600)
-  MAX_ITER: 12000
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
deleted file mode 100644
index e3ce6cf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index e5454bf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
-  AUG:
-    ENABLED: True
-    MIN_SIZES: (700, 800)  # to save some time
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 6dbfcde..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
deleted file mode 100644
index ffca550..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  ROI_HEADS:
-    BATCH_SIZE_PER_IMAGE: 256
-  MASK_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_val",)
-  TEST: ("coco_2017_val",)
-INPUT:
-  MIN_SIZE_TRAIN: (600,)
-  MAX_SIZE_TRAIN: 1000
-  MIN_SIZE_TEST: 800
-  MAX_SIZE_TEST: 1000
-SOLVER:
-  WARMUP_FACTOR: 0.3333333
-  WARMUP_ITERS: 100
-  STEPS: (5500, 5800)
-  MAX_ITER: 6000
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 42.0, 1.6], ["segm", "AP", 35.4, 1.25]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
deleted file mode 100644
index 70874e3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100_panoptic_separated",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
deleted file mode 100644
index 7cdee7b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "PanopticFPN"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  SEM_SEG_HEAD:
-    LOSS_WEIGHT: 0.5
-DATASETS:
-  TRAIN: ("coco_2017_val_100_panoptic_separated",)
-  TEST: ("coco_2017_val_100_panoptic_separated",)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 1
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
deleted file mode 100644
index 0581631..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "PanopticFPN"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-  SEM_SEG_HEAD:
-    LOSS_WEIGHT: 0.5
-DATASETS:
-  TRAIN: ("coco_2017_val_panoptic_separated",)
-  TEST: ("coco_2017_val_panoptic_separated",)
-SOLVER:
-  BASE_LR: 0.01
-  WARMUP_FACTOR: 0.001
-  WARMUP_ITERS: 500
-  STEPS: (5500,)
-  MAX_ITER: 7000
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 38.73, 0.7], ["sem_seg", "mIoU", 64.73, 1.2], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index 36b9988..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 44.36, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 8d95c1f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index c7c3f90..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
-DATASETS:
-  TEST: ("coco_2017_val_100",)
-TEST:
-  EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 402d432..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-DATASETS:
-  TRAIN: ("coco_2017_val_100",)
-  TEST: ("coco_2017_val_100",)
-SOLVER:
-  STEPS: (30,)
-  MAX_ITER: 40
-  BASE_LR: 0.005
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index bca7498..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "SemanticSegmentor"
-  WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
-  RESNETS:
-    DEPTH: 50
-DATASETS:
-  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
-TEST:
-  EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 14ab606..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "SemanticSegmentor"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-DATASETS:
-  TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
-  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-SOLVER:
-  BASE_LR: 0.005
-  STEPS: (30,)
-  MAX_ITER: 40
-  IMS_PER_BATCH: 4
-DATALOADER:
-  NUM_WORKERS: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
deleted file mode 100644
index 1f78d77..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "SemanticSegmentor"
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-DATASETS:
-  TRAIN: ("coco_2017_val_panoptic_stuffonly",)
-  TEST: ("coco_2017_val_panoptic_stuffonly",)
-SOLVER:
-  BASE_LR: 0.01
-  WARMUP_FACTOR: 0.001
-  WARMUP_ITERS: 300
-  STEPS: (5500,)
-  MAX_ITER: 7000
-TEST:
-  EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
-INPUT:
-  # no scale augmentation
-  MIN_SIZE_TRAIN: (800, )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md b/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md
deleted file mode 100644
index caa755f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-
-## Detectron2 Demo
-
-We provide a command line tool to run a simple demo of builtin models.
-The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
-
-See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-)
-for a high-quality demo generated with this tool.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py b/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py
deleted file mode 100644
index 1fd8df8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import argparse
-import glob
-import multiprocessing as mp
-import os
-import time
-import cv2
-import tqdm
-
-from detectron2.config import get_cfg
-from detectron2.data.detection_utils import read_image
-from detectron2.utils.logger import setup_logger
-
-from predictor import VisualizationDemo
-
-# constants
-WINDOW_NAME = "COCO detections"
-
-
-def setup_cfg(args):
-    # load config from file and command-line arguments
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    # Set score_threshold for builtin models
-    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
-    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
-    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
-    cfg.freeze()
-    return cfg
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
-    parser.add_argument(
-        "--config-file",
-        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
-        metavar="FILE",
-        help="path to config file",
-    )
-    parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
-    parser.add_argument("--video-input", help="Path to video file.")
-    parser.add_argument(
-        "--input",
-        nargs="+",
-        help="A list of space separated input images; "
-        "or a single glob pattern such as 'directory/*.jpg'",
-    )
-    parser.add_argument(
-        "--output",
-        help="A file or directory to save output visualizations. "
-        "If not given, will show output in an OpenCV window.",
-    )
-
-    parser.add_argument(
-        "--confidence-threshold",
-        type=float,
-        default=0.5,
-        help="Minimum score for instance predictions to be shown",
-    )
-    parser.add_argument(
-        "--opts",
-        help="Modify config options using the command-line 'KEY VALUE' pairs",
-        default=[],
-        nargs=argparse.REMAINDER,
-    )
-    return parser
-
-
-if __name__ == "__main__":
-    mp.set_start_method("spawn", force=True)
-    args = get_parser().parse_args()
-    setup_logger(name="fvcore")
-    logger = setup_logger()
-    logger.info("Arguments: " + str(args))
-
-    cfg = setup_cfg(args)
-
-    demo = VisualizationDemo(cfg)
-
-    if args.input:
-        if len(args.input) == 1:
-            args.input = glob.glob(os.path.expanduser(args.input[0]))
-            assert args.input, "The input path(s) was not found"
-        for path in tqdm.tqdm(args.input, disable=not args.output):
-            # use PIL, to be consistent with evaluation
-            img = read_image(path, format="BGR")
-            start_time = time.time()
-            predictions, visualized_output = demo.run_on_image(img)
-            logger.info(
-                "{}: {} in {:.2f}s".format(
-                    path,
-                    "detected {} instances".format(len(predictions["instances"]))
-                    if "instances" in predictions
-                    else "finished",
-                    time.time() - start_time,
-                )
-            )
-
-            if args.output:
-                if os.path.isdir(args.output):
-                    assert os.path.isdir(args.output), args.output
-                    out_filename = os.path.join(args.output, os.path.basename(path))
-                else:
-                    assert len(args.input) == 1, "Please specify a directory with args.output"
-                    out_filename = args.output
-                visualized_output.save(out_filename)
-            else:
-                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
-                cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
-                if cv2.waitKey(0) == 27:
-                    break  # esc to quit
-    elif args.webcam:
-        assert args.input is None, "Cannot have both --input and --webcam!"
-        assert args.output is None, "output not yet supported with --webcam!"
-        cam = cv2.VideoCapture(0)
-        for vis in tqdm.tqdm(demo.run_on_video(cam)):
-            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
-            cv2.imshow(WINDOW_NAME, vis)
-            if cv2.waitKey(1) == 27:
-                break  # esc to quit
-        cam.release()
-        cv2.destroyAllWindows()
-    elif args.video_input:
-        video = cv2.VideoCapture(args.video_input)
-        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        frames_per_second = video.get(cv2.CAP_PROP_FPS)
-        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        basename = os.path.basename(args.video_input)
-
-        if args.output:
-            if os.path.isdir(args.output):
-                output_fname = os.path.join(args.output, basename)
-                output_fname = os.path.splitext(output_fname)[0] + ".mkv"
-            else:
-                output_fname = args.output
-            assert not os.path.isfile(output_fname), output_fname
-            output_file = cv2.VideoWriter(
-                filename=output_fname,
-                # some installation of opencv may not support x264 (due to its license),
-                # you can try other format (e.g. MPEG)
-                fourcc=cv2.VideoWriter_fourcc(*"x264"),
-                fps=float(frames_per_second),
-                frameSize=(width, height),
-                isColor=True,
-            )
-        assert os.path.isfile(args.video_input)
-        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
-            if args.output:
-                output_file.write(vis_frame)
-            else:
-                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
-                cv2.imshow(basename, vis_frame)
-                if cv2.waitKey(1) == 27:
-                    break  # esc to quit
-        video.release()
-        if args.output:
-            output_file.release()
-        else:
-            cv2.destroyAllWindows()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py b/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py
deleted file mode 100644
index 689fa85..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import atexit
-import bisect
-import multiprocessing as mp
-from collections import deque
-import cv2
-import torch
-
-from detectron2.data import MetadataCatalog
-from detectron2.engine.defaults import DefaultPredictor
-from detectron2.utils.video_visualizer import VideoVisualizer
-from detectron2.utils.visualizer import ColorMode, Visualizer
-
-
-class VisualizationDemo(object):
-    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
-        """
-        Args:
-            cfg (CfgNode):
-            instance_mode (ColorMode):
-            parallel (bool): whether to run the model in different processes from visualization.
-                Useful since the visualization logic can be slow.
-        """
-        self.metadata = MetadataCatalog.get(
-            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
-        )
-        self.cpu_device = torch.device("cpu")
-        self.instance_mode = instance_mode
-
-        self.parallel = parallel
-        if parallel:
-            num_gpu = torch.cuda.device_count()
-            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
-        else:
-            self.predictor = DefaultPredictor(cfg)
-
-    def run_on_image(self, image):
-        """
-        Args:
-            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
-                This is the format used by OpenCV.
-
-        Returns:
-            predictions (dict): the output of the model.
-            vis_output (VisImage): the visualized image output.
-        """
-        vis_output = None
-        predictions = self.predictor(image)
-        # Convert image from OpenCV BGR format to Matplotlib RGB format.
-        image = image[:, :, ::-1]
-        visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
-        if "panoptic_seg" in predictions:
-            panoptic_seg, segments_info = predictions["panoptic_seg"]
-            vis_output = visualizer.draw_panoptic_seg_predictions(
-                panoptic_seg.to(self.cpu_device), segments_info
-            )
-        else:
-            if "sem_seg" in predictions:
-                vis_output = visualizer.draw_sem_seg(
-                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
-                )
-            if "instances" in predictions:
-                instances = predictions["instances"].to(self.cpu_device)
-                vis_output = visualizer.draw_instance_predictions(predictions=instances)
-
-        return predictions, vis_output
-
-    def _frame_from_video(self, video):
-        while video.isOpened():
-            success, frame = video.read()
-            if success:
-                yield frame
-            else:
-                break
-
-    def run_on_video(self, video):
-        """
-        Visualizes predictions on frames of the input video.
-
-        Args:
-            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
-                either a webcam or a video file.
-
-        Yields:
-            ndarray: BGR visualizations of each video frame.
-        """
-        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
-
-        def process_predictions(frame, predictions):
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-            if "panoptic_seg" in predictions:
-                panoptic_seg, segments_info = predictions["panoptic_seg"]
-                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
-                    frame, panoptic_seg.to(self.cpu_device), segments_info
-                )
-            elif "instances" in predictions:
-                predictions = predictions["instances"].to(self.cpu_device)
-                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
-            elif "sem_seg" in predictions:
-                vis_frame = video_visualizer.draw_sem_seg(
-                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
-                )
-
-            # Converts Matplotlib RGB format to OpenCV BGR format
-            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
-            return vis_frame
-
-        frame_gen = self._frame_from_video(video)
-        if self.parallel:
-            buffer_size = self.predictor.default_buffer_size
-
-            frame_data = deque()
-
-            for cnt, frame in enumerate(frame_gen):
-                frame_data.append(frame)
-                self.predictor.put(frame)
-
-                if cnt >= buffer_size:
-                    frame = frame_data.popleft()
-                    predictions = self.predictor.get()
-                    yield process_predictions(frame, predictions)
-
-            while len(frame_data):
-                frame = frame_data.popleft()
-                predictions = self.predictor.get()
-                yield process_predictions(frame, predictions)
-        else:
-            for frame in frame_gen:
-                yield process_predictions(frame, self.predictor(frame))
-
-
-class AsyncPredictor:
-    """
-    A predictor that runs the model asynchronously, possibly on >1 GPUs.
-    Because rendering the visualization takes considerably amount of time,
-    this helps improve throughput when rendering videos.
-    """
-
-    class _StopToken:
-        pass
-
-    class _PredictWorker(mp.Process):
-        def __init__(self, cfg, task_queue, result_queue):
-            self.cfg = cfg
-            self.task_queue = task_queue
-            self.result_queue = result_queue
-            super().__init__()
-
-        def run(self):
-            predictor = DefaultPredictor(self.cfg)
-
-            while True:
-                task = self.task_queue.get()
-                if isinstance(task, AsyncPredictor._StopToken):
-                    break
-                idx, data = task
-                result = predictor(data)
-                self.result_queue.put((idx, result))
-
-    def __init__(self, cfg, num_gpus: int = 1):
-        """
-        Args:
-            cfg (CfgNode):
-            num_gpus (int): if 0, will run on CPU
-        """
-        num_workers = max(num_gpus, 1)
-        self.task_queue = mp.Queue(maxsize=num_workers * 3)
-        self.result_queue = mp.Queue(maxsize=num_workers * 3)
-        self.procs = []
-        for gpuid in range(max(num_gpus, 1)):
-            cfg = cfg.clone()
-            cfg.defrost()
-            cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
-            self.procs.append(
-                AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
-            )
-
-        self.put_idx = 0
-        self.get_idx = 0
-        self.result_rank = []
-        self.result_data = []
-
-        for p in self.procs:
-            p.start()
-        atexit.register(self.shutdown)
-
-    def put(self, image):
-        self.put_idx += 1
-        self.task_queue.put((self.put_idx, image))
-
-    def get(self):
-        self.get_idx += 1  # the index needed for this request
-        if len(self.result_rank) and self.result_rank[0] == self.get_idx:
-            res = self.result_data[0]
-            del self.result_data[0], self.result_rank[0]
-            return res
-
-        while True:
-            # make sure the results are returned in the correct order
-            idx, res = self.result_queue.get()
-            if idx == self.get_idx:
-                return res
-            insert = bisect.bisect(self.result_rank, idx)
-            self.result_rank.insert(insert, idx)
-            self.result_data.insert(insert, res)
-
-    def __len__(self):
-        return self.put_idx - self.get_idx
-
-    def __call__(self, image):
-        self.put(image)
-        return self.get()
-
-    def shutdown(self):
-        for _ in self.procs:
-            self.task_queue.put(AsyncPredictor._StopToken())
-
-    @property
-    def default_buffer_size(self):
-        return len(self.procs) * 5
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py
deleted file mode 100644
index 41816af..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-from .utils.env import setup_environment
-
-setup_environment()
-
-
-# This line will be programatically read/write by setup.py.
-# Leave them at the bottom of this file and don't touch them.
-__version__ = "0.1.3"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py
deleted file mode 100644
index e17a9df..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# File:
-
-
-from . import catalog as _UNUSED  # register the handler
-from .detection_checkpoint import DetectionCheckpointer
-from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
-
-__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py
deleted file mode 100644
index e27ba84..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import re
-import torch
-from fvcore.common.checkpoint import (
-    get_missing_parameters_message,
-    get_unexpected_parameters_message,
-)
-
-
-def convert_basic_c2_names(original_keys):
-    """
-    Apply some basic name conversion to names in C2 weights.
-    It only deals with typical backbone models.
-
-    Args:
-        original_keys (list[str]):
-    Returns:
-        list[str]: The same number of strings matching those in original_keys.
-    """
-    layer_keys = copy.deepcopy(original_keys)
-    layer_keys = [
-        {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
-    ]  # some hard-coded mappings
-
-    layer_keys = [k.replace("_", ".") for k in layer_keys]
-    layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
-    layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
-    # Uniform both bn and gn names to "norm"
-    layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
-    layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
-    layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
-    layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
-
-    # stem
-    layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
-    # to avoid mis-matching with "conv1" in other components (e.g. detection head)
-    layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
-
-    # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
-    # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
-    # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
-    # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
-    # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
-
-    # blocks
-    layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
-    layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
-    layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
-    layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
-
-    # DensePose substitutions
-    layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
-    layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
-    layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
-    layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
-    layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
-    return layer_keys
-
-
-def convert_c2_detectron_names(weights):
-    """
-    Map Caffe2 Detectron weight names to Detectron2 names.
-
-    Args:
-        weights (dict): name -> tensor
-
-    Returns:
-        dict: detectron2 names -> tensor
-        dict: detectron2 names -> C2 names
-    """
-    logger = logging.getLogger(__name__)
-    logger.info("Remapping C2 weights ......")
-    original_keys = sorted(weights.keys())
-    layer_keys = copy.deepcopy(original_keys)
-
-    layer_keys = convert_basic_c2_names(layer_keys)
-
-    # --------------------------------------------------------------------------
-    # RPN hidden representation conv
-    # --------------------------------------------------------------------------
-    # FPN case
-    # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
-    # shared for all other levels, hence the appearance of "fpn2"
-    layer_keys = [
-        k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
-    ]
-    # Non-FPN case
-    layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
-
-    # --------------------------------------------------------------------------
-    # RPN box transformation conv
-    # --------------------------------------------------------------------------
-    # FPN case (see note above about "fpn2")
-    layer_keys = [
-        k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
-        for k in layer_keys
-    ]
-    layer_keys = [
-        k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
-        for k in layer_keys
-    ]
-    # Non-FPN case
-    layer_keys = [
-        k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
-    ]
-    layer_keys = [
-        k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
-        for k in layer_keys
-    ]
-
-    # --------------------------------------------------------------------------
-    # Fast R-CNN box head
-    # --------------------------------------------------------------------------
-    layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
-    layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
-    layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
-    layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
-    # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
-    layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
-
-    # --------------------------------------------------------------------------
-    # FPN lateral and output convolutions
-    # --------------------------------------------------------------------------
-    def fpn_map(name):
-        """
-        Look for keys with the following patterns:
-        1) Starts with "fpn.inner."
-           Example: "fpn.inner.res2.2.sum.lateral.weight"
-           Meaning: These are lateral pathway convolutions
-        2) Starts with "fpn.res"
-           Example: "fpn.res2.2.sum.weight"
-           Meaning: These are FPN output convolutions
-        """
-        splits = name.split(".")
-        norm = ".norm" if "norm" in splits else ""
-        if name.startswith("fpn.inner."):
-            # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
-            stage = int(splits[2][len("res") :])
-            return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
-        elif name.startswith("fpn.res"):
-            # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
-            stage = int(splits[1][len("res") :])
-            return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
-        return name
-
-    layer_keys = [fpn_map(k) for k in layer_keys]
-
-    # --------------------------------------------------------------------------
-    # Mask R-CNN mask head
-    # --------------------------------------------------------------------------
-    # roi_heads.StandardROIHeads case
-    layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
-    layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
-    layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
-    # roi_heads.Res5ROIHeads case
-    layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
-
-    # --------------------------------------------------------------------------
-    # Keypoint R-CNN head
-    # --------------------------------------------------------------------------
-    # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
-    layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
-    layer_keys = [
-        k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
-    ]
-    layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
-
-    # --------------------------------------------------------------------------
-    # Done with replacements
-    # --------------------------------------------------------------------------
-    assert len(set(layer_keys)) == len(layer_keys)
-    assert len(original_keys) == len(layer_keys)
-
-    new_weights = {}
-    new_keys_to_original_keys = {}
-    for orig, renamed in zip(original_keys, layer_keys):
-        new_keys_to_original_keys[renamed] = orig
-        if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
-            # remove the meaningless prediction weight for background class
-            new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
-            new_weights[renamed] = weights[orig][new_start_idx:]
-            logger.info(
-                "Remove prediction weight for background class in {}. The shape changes from "
-                "{} to {}.".format(
-                    renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
-                )
-            )
-        elif renamed.startswith("cls_score."):
-            # move weights of bg class from original index 0 to last index
-            logger.info(
-                "Move classification weights for background class in {} from index 0 to "
-                "index {}.".format(renamed, weights[orig].shape[0] - 1)
-            )
-            new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
-        else:
-            new_weights[renamed] = weights[orig]
-
-    return new_weights, new_keys_to_original_keys
-
-
-# Note the current matching is not symmetric.
-# it assumes model_state_dict will have longer names.
-def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
-    """
-    Match names between the two state-dict, and update the values of model_state_dict in-place with
-    copies of the matched tensor in ckpt_state_dict.
-    If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
-    model and will be renamed at first.
-
-    Strategy: suppose that the models that we will create will have prefixes appended
-    to each of its keys, for example due to an extra level of nesting that the original
-    pre-trained weights from ImageNet won't contain. For example, model.state_dict()
-    might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
-    res2.conv1.weight. We thus want to match both parameters together.
-    For that, we look for each model weight, look among all loaded keys if there is one
-    that is a suffix of the current weight name, and use it if that's the case.
-    If multiple matches exist, take the one with longest size
-    of the corresponding name. For example, for the same model as before, the pretrained
-    weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
-    we want to match backbone[0].body.conv1.weight to conv1.weight, and
-    backbone[0].body.res2.conv1.weight to res2.conv1.weight.
-    """
-    model_keys = sorted(model_state_dict.keys())
-    if c2_conversion:
-        ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
-        # original_keys: the name in the original dict (before renaming)
-    else:
-        original_keys = {x: x for x in ckpt_state_dict.keys()}
-    ckpt_keys = sorted(ckpt_state_dict.keys())
-
-    def match(a, b):
-        # Matched ckpt_key should be a complete (starts with '.') suffix.
-        # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
-        # but matches whatever_conv1 or mesh_head.whatever_conv1.
-        return a == b or a.endswith("." + b)
-
-    # get a matrix of string matches, where each (i, j) entry correspond to the size of the
-    # ckpt_key string, if it matches
-    match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
-    match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
-    # use the matched one with longest size in case of multiple matches
-    max_match_size, idxs = match_matrix.max(1)
-    # remove indices that correspond to no-match
-    idxs[max_match_size == 0] = -1
-
-    # used for logging
-    max_len_model = max(len(key) for key in model_keys) if model_keys else 1
-    max_len_ckpt = max(len(key) for key in ckpt_keys) if ckpt_keys else 1
-    log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
-    logger = logging.getLogger(__name__)
-    # matched_pairs (matched checkpoint key --> matched model key)
-    matched_keys = {}
-    for idx_model, idx_ckpt in enumerate(idxs.tolist()):
-        if idx_ckpt == -1:
-            continue
-        key_model = model_keys[idx_model]
-        key_ckpt = ckpt_keys[idx_ckpt]
-        value_ckpt = ckpt_state_dict[key_ckpt]
-        shape_in_model = model_state_dict[key_model].shape
-
-        if shape_in_model != value_ckpt.shape:
-            logger.warning(
-                "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
-                    key_ckpt, value_ckpt.shape, key_model, shape_in_model
-                )
-            )
-            logger.warning(
-                "{} will not be loaded. Please double check and see if this is desired.".format(
-                    key_ckpt
-                )
-            )
-            continue
-
-        model_state_dict[key_model] = value_ckpt.clone()
-        if key_ckpt in matched_keys:  # already added to matched_keys
-            logger.error(
-                "Ambiguity found for {} in checkpoint!"
-                "It matches at least two keys in the model ({} and {}).".format(
-                    key_ckpt, key_model, matched_keys[key_ckpt]
-                )
-            )
-            raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
-
-        matched_keys[key_ckpt] = key_model
-        logger.info(
-            log_str_template.format(
-                key_model,
-                max_len_model,
-                original_keys[key_ckpt],
-                max_len_ckpt,
-                tuple(shape_in_model),
-            )
-        )
-    matched_model_keys = matched_keys.values()
-    matched_ckpt_keys = matched_keys.keys()
-    # print warnings about unmatched keys on both side
-    unmatched_model_keys = [k for k in model_keys if k not in matched_model_keys]
-    if len(unmatched_model_keys):
-        logger.info(get_missing_parameters_message(unmatched_model_keys))
-
-    unmatched_ckpt_keys = [k for k in ckpt_keys if k not in matched_ckpt_keys]
-    if len(unmatched_ckpt_keys):
-        logger.info(
-            get_unexpected_parameters_message(original_keys[x] for x in unmatched_ckpt_keys)
-        )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py
deleted file mode 100644
index 62f81f3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-from fvcore.common.file_io import PathHandler, PathManager
-
-
-class ModelCatalog(object):
-    """
-    Store mappings from names to third-party models.
-    """
-
-    S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
-
-    # MSRA models have STRIDE_IN_1X1=True. False otherwise.
-    # NOTE: all BN models here have fused BN into an affine layer.
-    # As a result, you should only load them to a model with "FrozenBN".
-    # Loading them to a model with regular BN or SyncBN is wrong.
-    # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
-    # which should be negligible for training.
-    # NOTE: all models here uses PIXEL_STD=[1,1,1]
-    # NOTE: Most of the BN models here are no longer used. We use the
-    # re-converted pre-trained models under detectron2 model zoo instead.
-    C2_IMAGENET_MODELS = {
-        "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
-        "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
-        "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
-        "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
-        "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
-        "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
-        "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
-    }
-
-    C2_DETECTRON_PATH_FORMAT = (
-        "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
-    )
-
-    C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
-    C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
-
-    # format: {model_name} -> part of the url
-    C2_DETECTRON_MODELS = {
-        "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
-        "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
-        "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
-        "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
-        "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
-        "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
-        "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
-        "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
-        "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
-        "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
-        "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
-        "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
-        "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
-    }
-
-    @staticmethod
-    def get(name):
-        if name.startswith("Caffe2Detectron/COCO"):
-            return ModelCatalog._get_c2_detectron_baseline(name)
-        if name.startswith("ImageNetPretrained/"):
-            return ModelCatalog._get_c2_imagenet_pretrained(name)
-        raise RuntimeError("model not present in the catalog: {}".format(name))
-
-    @staticmethod
-    def _get_c2_imagenet_pretrained(name):
-        prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
-        name = name[len("ImageNetPretrained/") :]
-        name = ModelCatalog.C2_IMAGENET_MODELS[name]
-        url = "/".join([prefix, name])
-        return url
-
-    @staticmethod
-    def _get_c2_detectron_baseline(name):
-        name = name[len("Caffe2Detectron/COCO/") :]
-        url = ModelCatalog.C2_DETECTRON_MODELS[name]
-        if "keypoint_rcnn" in name:
-            dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
-        else:
-            dataset = ModelCatalog.C2_DATASET_COCO
-
-        if "35998355/rpn_R-50-C4_1x" in name:
-            # this one model is somehow different from others ..
-            type = "rpn"
-        else:
-            type = "generalized_rcnn"
-
-        # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
-        url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
-            prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
-        )
-        return url
-
-
-class ModelCatalogHandler(PathHandler):
-    """
-    Resolve URL like catalog://.
-    """
-
-    PREFIX = "catalog://"
-
-    def _get_supported_prefixes(self):
-        return [self.PREFIX]
-
-    def _get_local_path(self, path):
-        logger = logging.getLogger(__name__)
-        catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
-        logger.info("Catalog entry {} points to {}".format(path, catalog_path))
-        return PathManager.get_local_path(catalog_path)
-
-    def _open(self, path, mode="r", **kwargs):
-        return PathManager.open(self._get_local_path(path), mode, **kwargs)
-
-
-class Detectron2Handler(PathHandler):
-    """
-    Resolve anything that's in Detectron2 model zoo.
-    """
-
-    PREFIX = "detectron2://"
-    S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
-
-    def _get_supported_prefixes(self):
-        return [self.PREFIX]
-
-    def _get_local_path(self, path):
-        name = path[len(self.PREFIX) :]
-        return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
-
-    def _open(self, path, mode="r", **kwargs):
-        return PathManager.open(self._get_local_path(path), mode, **kwargs)
-
-
-PathManager.register_handler(ModelCatalogHandler())
-PathManager.register_handler(Detectron2Handler())
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py
deleted file mode 100644
index 06e6739..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import pickle
-from fvcore.common.checkpoint import Checkpointer
-from fvcore.common.file_io import PathManager
-
-import detectron2.utils.comm as comm
-
-from .c2_model_loading import align_and_update_state_dicts
-
-
-class DetectionCheckpointer(Checkpointer):
-    """
-    Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
-    model zoo, and apply conversions for legacy models.
-    """
-
-    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
-        is_main_process = comm.is_main_process()
-        super().__init__(
-            model,
-            save_dir,
-            save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
-            **checkpointables,
-        )
-
-    def _load_file(self, filename):
-        if filename.endswith(".pkl"):
-            with PathManager.open(filename, "rb") as f:
-                data = pickle.load(f, encoding="latin1")
-            if "model" in data and "__author__" in data:
-                # file is in Detectron2 model zoo format
-                self.logger.info("Reading a file from '{}'".format(data["__author__"]))
-                return data
-            else:
-                # assume file is from Caffe2 / Detectron1 model zoo
-                if "blobs" in data:
-                    # Detection models have "blobs", but ImageNet models don't
-                    data = data["blobs"]
-                data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
-                return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
-
-        loaded = super()._load_file(filename)  # load native pth checkpoint
-        if "model" not in loaded:
-            loaded = {"model": loaded}
-        return loaded
-
-    def _load_model(self, checkpoint):
-        if checkpoint.get("matching_heuristics", False):
-            self._convert_ndarray_to_tensor(checkpoint["model"])
-            # convert weights by name-matching heuristics
-            model_state_dict = self.model.state_dict()
-            align_and_update_state_dicts(
-                model_state_dict,
-                checkpoint["model"],
-                c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
-            )
-            checkpoint["model"] = model_state_dict
-        # for non-caffe2 models, use standard ways to load it
-        incompatible = super()._load_model(checkpoint)
-        if incompatible is None:  # support older versions of fvcore
-            return None
-
-        model_buffers = dict(self.model.named_buffers(recurse=False))
-        for k in ["pixel_mean", "pixel_std"]:
-            # Ignore missing key message about pixel_mean/std.
-            # Though they may be missing in old checkpoints, they will be correctly
-            # initialized from config anyway.
-            if k in model_buffers:
-                try:
-                    incompatible.missing_keys.remove(k)
-                except ValueError:
-                    pass
-        return incompatible
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py
deleted file mode 100644
index f996ecd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .compat import downgrade_config, upgrade_config
-from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
-
-__all__ = [
-    "CfgNode",
-    "get_cfg",
-    "global_cfg",
-    "set_global_cfg",
-    "downgrade_config",
-    "upgrade_config",
-    "configurable",
-]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py
deleted file mode 100644
index 41fe3a0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Backward compatibility of configs.
-
-Instructions to bump version:
-+ It's not needed to bump version if new keys are added.
-  It's only needed when backward-incompatible changes happen
-  (i.e., some existing keys disappear, or the meaning of a key changes)
-+ To bump version, do the following:
-    1. Increment _C.VERSION in defaults.py
-    2. Add a converter in this file.
-
-      Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
-      and a function "downgrade" which in-place downgrades config from X to X-1
-
-      In each function, VERSION is left unchanged.
-
-      Each converter assumes that its input has the relevant keys
-      (i.e., the input is not a partial config).
-    3. Run the tests (test_config.py) to make sure the upgrade & downgrade
-       functions are consistent.
-"""
-
-import logging
-from typing import List, Optional, Tuple
-
-from .config import CfgNode as CN
-from .defaults import _C
-
-__all__ = ["upgrade_config", "downgrade_config"]
-
-
-def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
-    """
-    Upgrade a config from its current version to a newer version.
-
-    Args:
-        cfg (CfgNode):
-        to_version (int): defaults to the latest version.
-    """
-    cfg = cfg.clone()
-    if to_version is None:
-        to_version = _C.VERSION
-
-    assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
-        cfg.VERSION, to_version
-    )
-    for k in range(cfg.VERSION, to_version):
-        converter = globals()["ConverterV" + str(k + 1)]
-        converter.upgrade(cfg)
-        cfg.VERSION = k + 1
-    return cfg
-
-
-def downgrade_config(cfg: CN, to_version: int) -> CN:
-    """
-    Downgrade a config from its current version to an older version.
-
-    Args:
-        cfg (CfgNode):
-        to_version (int):
-
-    Note:
-        A general downgrade of arbitrary configs is not always possible due to the
-        different functionalities in different versions.
-        The purpose of downgrade is only to recover the defaults in old versions,
-        allowing it to load an old partial yaml config.
-        Therefore, the implementation only needs to fill in the default values
-        in the old version when a general downgrade is not possible.
-    """
-    cfg = cfg.clone()
-    assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
-        cfg.VERSION, to_version
-    )
-    for k in range(cfg.VERSION, to_version, -1):
-        converter = globals()["ConverterV" + str(k)]
-        converter.downgrade(cfg)
-        cfg.VERSION = k - 1
-    return cfg
-
-
-def guess_version(cfg: CN, filename: str) -> int:
-    """
-    Guess the version of a partial config where the VERSION field is not specified.
-    Returns the version, or the latest if cannot make a guess.
-
-    This makes it easier for users to migrate.
-    """
-    logger = logging.getLogger(__name__)
-
-    def _has(name: str) -> bool:
-        cur = cfg
-        for n in name.split("."):
-            if n not in cur:
-                return False
-            cur = cur[n]
-        return True
-
-    # Most users' partial configs have "MODEL.WEIGHT", so guess on it
-    ret = None
-    if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
-        ret = 1
-
-    if ret is not None:
-        logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
-    else:
-        ret = _C.VERSION
-        logger.warning(
-            "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
-                filename, ret
-            )
-        )
-    return ret
-
-
-def _rename(cfg: CN, old: str, new: str) -> None:
-    old_keys = old.split(".")
-    new_keys = new.split(".")
-
-    def _set(key_seq: List[str], val: str) -> None:
-        cur = cfg
-        for k in key_seq[:-1]:
-            if k not in cur:
-                cur[k] = CN()
-            cur = cur[k]
-        cur[key_seq[-1]] = val
-
-    def _get(key_seq: List[str]) -> CN:
-        cur = cfg
-        for k in key_seq:
-            cur = cur[k]
-        return cur
-
-    def _del(key_seq: List[str]) -> None:
-        cur = cfg
-        for k in key_seq[:-1]:
-            cur = cur[k]
-        del cur[key_seq[-1]]
-        if len(cur) == 0 and len(key_seq) > 1:
-            _del(key_seq[:-1])
-
-    _set(new_keys, _get(old_keys))
-    _del(old_keys)
-
-
-class _RenameConverter:
-    """
-    A converter that handles simple rename.
-    """
-
-    RENAME: List[Tuple[str, str]] = []  # list of tuples of (old name, new name)
-
-    @classmethod
-    def upgrade(cls, cfg: CN) -> None:
-        for old, new in cls.RENAME:
-            _rename(cfg, old, new)
-
-    @classmethod
-    def downgrade(cls, cfg: CN) -> None:
-        for old, new in cls.RENAME[::-1]:
-            _rename(cfg, new, old)
-
-
-class ConverterV1(_RenameConverter):
-    RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
-
-
-class ConverterV2(_RenameConverter):
-    """
-    A large bulk of rename, before public release.
-    """
-
-    RENAME = [
-        ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
-        ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
-        ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
-        ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
-        ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
-        (
-            "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
-            "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
-        ),
-        (
-            "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
-            "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
-        ),
-        (
-            "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
-            "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
-        ),
-        ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
-        ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
-        ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
-        ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
-        ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
-        ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
-        ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
-        ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
-        ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
-        ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
-    ]
-
-    @classmethod
-    def upgrade(cls, cfg: CN) -> None:
-        super().upgrade(cfg)
-
-        if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
-            _rename(
-                cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
-            )
-            _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
-            del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
-            del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
-        else:
-            _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
-            _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
-            del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
-            del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
-        del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
-
-    @classmethod
-    def downgrade(cls, cfg: CN) -> None:
-        super().downgrade(cfg)
-
-        _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
-        _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
-        cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
-        cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
-        cfg.MODEL.RETINANET.ANCHOR_STRIDES = []  # this is not used anywhere in any version
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py
deleted file mode 100644
index 14ad524..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import functools
-import inspect
-import logging
-from fvcore.common.config import CfgNode as _CfgNode
-from fvcore.common.file_io import PathManager
-
-
-class CfgNode(_CfgNode):
-    """
-    The same as `fvcore.common.config.CfgNode`, but different in:
-
-    1. Use unsafe yaml loading by default.
-       Note that this may lead to arbitrary code execution: you must not
-       load a config file from untrusted sources before manually inspecting
-       the content of the file.
-    2. Support config versioning.
-       When attempting to merge an old config, it will convert the old config automatically.
-    """
-
-    # Note that the default value of allow_unsafe is changed to True
-    def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
-        assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
-        loaded_cfg = _CfgNode.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
-        loaded_cfg = type(self)(loaded_cfg)
-
-        # defaults.py needs to import CfgNode
-        from .defaults import _C
-
-        latest_ver = _C.VERSION
-        assert (
-            latest_ver == self.VERSION
-        ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
-
-        logger = logging.getLogger(__name__)
-
-        loaded_ver = loaded_cfg.get("VERSION", None)
-        if loaded_ver is None:
-            from .compat import guess_version
-
-            loaded_ver = guess_version(loaded_cfg, cfg_filename)
-        assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
-            loaded_ver, self.VERSION
-        )
-
-        if loaded_ver == self.VERSION:
-            self.merge_from_other_cfg(loaded_cfg)
-        else:
-            # compat.py needs to import CfgNode
-            from .compat import upgrade_config, downgrade_config
-
-            logger.warning(
-                "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
-                "See docs/CHANGELOG.md for instructions to update your files.".format(
-                    loaded_ver, cfg_filename, self.VERSION
-                )
-            )
-            # To convert, first obtain a full config at an old version
-            old_self = downgrade_config(self, to_version=loaded_ver)
-            old_self.merge_from_other_cfg(loaded_cfg)
-            new_config = upgrade_config(old_self)
-            self.clear()
-            self.update(new_config)
-
-    def dump(self, *args, **kwargs):
-        """
-        Returns:
-            str: a yaml string representation of the config
-        """
-        # to make it show up in docs
-        return super().dump(*args, **kwargs)
-
-
-global_cfg = CfgNode()
-
-
-def get_cfg() -> CfgNode:
-    """
-    Get a copy of the default config.
-
-    Returns:
-        a detectron2 CfgNode instance.
-    """
-    from .defaults import _C
-
-    return _C.clone()
-
-
-def set_global_cfg(cfg: CfgNode) -> None:
-    """
-    Let the global config point to the given cfg.
-
-    Assume that the given "cfg" has the key "KEY", after calling
-    `set_global_cfg(cfg)`, the key can be accessed by:
-
-    .. code-block:: python
-
-        from detectron2.config import global_cfg
-        print(global_cfg.KEY)
-
-    By using a hacky global config, you can access these configs anywhere,
-    without having to pass the config object or the values deep into the code.
-    This is a hacky feature introduced for quick prototyping / research exploration.
-    """
-    global global_cfg
-    global_cfg.clear()
-    global_cfg.update(cfg)
-
-
-def configurable(init_func):
-    """
-    Decorate a class's __init__ method so that it can be called with a CfgNode
-    object using the class's from_config classmethod.
-
-    Examples:
-
-    .. code-block:: python
-
-        class A:
-            @configurable
-            def __init__(self, a, b=2, c=3):
-                pass
-
-            @classmethod
-            def from_config(cls, cfg):
-                # Returns kwargs to be passed to __init__
-                return {"a": cfg.A, "b": cfg.B}
-
-        a1 = A(a=1, b=2)  # regular construction
-        a2 = A(cfg)       # construct with a cfg
-        a3 = A(cfg, b=3, c=4)  # construct with extra overwrite
-    """
-    assert init_func.__name__ == "__init__", "@configurable should only be used for __init__!"
-    if init_func.__module__.startswith("detectron2."):
-        assert (
-            init_func.__doc__ is not None and "experimental" in init_func.__doc__
-        ), f"configurable {init_func} should be marked experimental"
-
-    @functools.wraps(init_func)
-    def wrapped(self, *args, **kwargs):
-        try:
-            from_config_func = type(self).from_config
-        except AttributeError:
-            raise AttributeError("Class with @configurable must have a 'from_config' classmethod.")
-        if not inspect.ismethod(from_config_func):
-            raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
-
-        if _called_with_cfg(*args, **kwargs):
-            explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
-            init_func(self, **explicit_args)
-        else:
-            init_func(self, *args, **kwargs)
-
-    return wrapped
-
-
-def _get_args_from_config(from_config_func, *args, **kwargs):
-    """
-    Use `from_config` to obtain explicit arguments.
-
-    Returns:
-        dict: arguments to be used for cls.__init__
-    """
-    signature = inspect.signature(from_config_func)
-    if list(signature.parameters.keys())[0] != "cfg":
-        raise TypeError(
-            f"{from_config_func.__self__}.from_config must take 'cfg' as the first argument!"
-        )
-    support_var_arg = any(
-        param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
-        for param in signature.parameters.values()
-    )
-    if support_var_arg:  # forward all arguments to from_config, if from_config accepts them
-        ret = from_config_func(*args, **kwargs)
-    else:
-        # forward supported arguments to from_config
-        supported_arg_names = set(signature.parameters.keys())
-        extra_kwargs = {}
-        for name in list(kwargs.keys()):
-            if name not in supported_arg_names:
-                extra_kwargs[name] = kwargs.pop(name)
-        ret = from_config_func(*args, **kwargs)
-        # forward the other arguments to __init__
-        ret.update(extra_kwargs)
-    return ret
-
-
-def _called_with_cfg(*args, **kwargs):
-    """
-    Returns:
-        bool: whether the arguments contain CfgNode and should be considered
-            forwarded to from_config.
-    """
-    if len(args) and isinstance(args[0], _CfgNode):
-        return True
-    if isinstance(kwargs.pop("cfg", None), _CfgNode):
-        return True
-    # `from_config`'s first argument is forced to be "cfg".
-    # So the above check covers all cases.
-    return False
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py
deleted file mode 100644
index b9ad62f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py
+++ /dev/null
@@ -1,598 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .config import CfgNode as CN
-
-# -----------------------------------------------------------------------------
-# Convention about Training / Test specific parameters
-# -----------------------------------------------------------------------------
-# Whenever an argument can be either used for training or for testing, the
-# corresponding name will be post-fixed by a _TRAIN for a training parameter,
-# or _TEST for a test-specific parameter.
-# For example, the number of images during training will be
-# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
-# IMAGES_PER_BATCH_TEST
-
-# -----------------------------------------------------------------------------
-# Config definition
-# -----------------------------------------------------------------------------
-
-_C = CN()
-
-# The version number, to upgrade from old configs to new ones if any
-# changes happen. It's recommended to keep a VERSION in your config file.
-_C.VERSION = 2
-
-_C.MODEL = CN()
-_C.MODEL.LOAD_PROPOSALS = False
-_C.MODEL.MASK_ON = False
-_C.MODEL.KEYPOINT_ON = False
-_C.MODEL.DEVICE = "cuda"
-_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
-
-# Path (possibly with schema like catalog:// or detectron2://) to a checkpoint file
-# to be loaded to the model. You can find available models in the model zoo.
-_C.MODEL.WEIGHTS = ""
-
-# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
-# To train on images of different number of channels, just set different mean & std.
-# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
-_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
-# When using pre-trained models in Detectron1 or any MSRA models,
-# std has been absorbed into its conv1 weights, so the std needs to be set 1.
-# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
-_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
-
-
-# -----------------------------------------------------------------------------
-# INPUT
-# -----------------------------------------------------------------------------
-_C.INPUT = CN()
-# Size of the smallest side of the image during training
-_C.INPUT.MIN_SIZE_TRAIN = (800,)
-# Sample size of smallest side by choice or random selection from range give by
-# INPUT.MIN_SIZE_TRAIN
-_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
-# Maximum size of the side of the image during training
-_C.INPUT.MAX_SIZE_TRAIN = 1333
-# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
-_C.INPUT.MIN_SIZE_TEST = 800
-# Maximum size of the side of the image during testing
-_C.INPUT.MAX_SIZE_TEST = 1333
-
-# `True` if cropping is used for data augmentation during training
-_C.INPUT.CROP = CN({"ENABLED": False})
-# Cropping type:
-# - "relative" crop (H * CROP.SIZE[0], W * CROP.SIZE[1]) part of an input of size (H, W)
-# - "relative_range" uniformly sample relative crop size from between [CROP.SIZE[0], [CROP.SIZE[1]].
-#   and  [1, 1] and use it as in "relative" scenario.
-# - "absolute" crop part of an input with absolute size: (CROP.SIZE[0], CROP.SIZE[1]).
-_C.INPUT.CROP.TYPE = "relative_range"
-# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
-# pixels if CROP.TYPE is "absolute"
-_C.INPUT.CROP.SIZE = [0.9, 0.9]
-
-
-# Whether the model needs RGB, YUV, HSV etc.
-# Should be one of the modes defined here, as we use PIL to read the image:
-# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
-# with BGR being the one exception. One can set image format to BGR, we will
-# internally use RGB for conversion and flip the channels over
-_C.INPUT.FORMAT = "BGR"
-# The ground truth mask format that the model will use.
-# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
-_C.INPUT.MASK_FORMAT = "polygon"  # alternative: "bitmask"
-
-
-# -----------------------------------------------------------------------------
-# Dataset
-# -----------------------------------------------------------------------------
-_C.DATASETS = CN()
-# List of the dataset names for training. Must be registered in DatasetCatalog
-_C.DATASETS.TRAIN = ()
-# List of the pre-computed proposal files for training, which must be consistent
-# with data listed in DATASETS.TRAIN.
-_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
-# Number of top scoring precomputed proposals to keep for training
-_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
-# List of the dataset names for testing. Must be registered in DatasetCatalog
-_C.DATASETS.TEST = ()
-# List of the pre-computed proposal files for test, which must be consistent
-# with data listed in DATASETS.TEST.
-_C.DATASETS.PROPOSAL_FILES_TEST = ()
-# Number of top scoring precomputed proposals to keep for test
-_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
-
-# -----------------------------------------------------------------------------
-# DataLoader
-# -----------------------------------------------------------------------------
-_C.DATALOADER = CN()
-# Number of data loading threads
-_C.DATALOADER.NUM_WORKERS = 4
-# If True, each batch should contain only images for which the aspect ratio
-# is compatible. This groups portrait images together, and landscape images
-# are not batched with portrait images.
-_C.DATALOADER.ASPECT_RATIO_GROUPING = True
-# Options: TrainingSampler, RepeatFactorTrainingSampler
-_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
-# Repeat threshold for RepeatFactorTrainingSampler
-_C.DATALOADER.REPEAT_THRESHOLD = 0.0
-# if True, the dataloader will filter out images that have no associated
-# annotations at train time.
-_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
-
-# ---------------------------------------------------------------------------- #
-# Backbone options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.BACKBONE = CN()
-
-_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
-# Freeze the first several stages so they are not trained.
-# There are 5 stages in ResNet. The first is a convolution, and the following
-# stages are each group of residual blocks.
-_C.MODEL.BACKBONE.FREEZE_AT = 2
-
-
-# ---------------------------------------------------------------------------- #
-# FPN options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.FPN = CN()
-# Names of the input feature maps to be used by FPN
-# They must have contiguous power of 2 strides
-# e.g., ["res2", "res3", "res4", "res5"]
-_C.MODEL.FPN.IN_FEATURES = []
-_C.MODEL.FPN.OUT_CHANNELS = 256
-
-# Options: "" (no norm), "GN"
-_C.MODEL.FPN.NORM = ""
-
-# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
-_C.MODEL.FPN.FUSE_TYPE = "sum"
-
-
-# ---------------------------------------------------------------------------- #
-# Proposal generator options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.PROPOSAL_GENERATOR = CN()
-# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
-_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
-# Proposal height and width both need to be greater than MIN_SIZE
-# (a the scale used during training or inference)
-_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
-
-
-# ---------------------------------------------------------------------------- #
-# Anchor generator options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ANCHOR_GENERATOR = CN()
-# The generator can be any name in the ANCHOR_GENERATOR registry
-_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
-# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
-# Format: list[list[float]]. SIZES[i] specifies the list of sizes
-# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true,
-# or len(SIZES) == 1 is true and size list SIZES[0] is used for all
-# IN_FEATURES.
-_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
-# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
-# ratios are generated by an anchor generator.
-# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
-# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
-# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
-# for all IN_FEATURES.
-_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
-# Anchor angles.
-# list[list[float]], the angle in degrees, for each input feature map.
-# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
-_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
-# Relative offset between the center of the first anchor and the top-left corner of the image
-# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
-# The value is not expected to affect model accuracy.
-_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
-
-# ---------------------------------------------------------------------------- #
-# RPN options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.RPN = CN()
-_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead"  # used by RPN_HEAD_REGISTRY
-
-# Names of the input feature maps to be used by RPN
-# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
-_C.MODEL.RPN.IN_FEATURES = ["res4"]
-# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
-# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
-_C.MODEL.RPN.BOUNDARY_THRESH = -1
-# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
-# Minimum overlap required between an anchor and ground-truth box for the
-# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
-# ==> positive RPN example: 1)
-# Maximum overlap allowed between an anchor and ground-truth box for the
-# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
-# ==> negative RPN example: 0)
-# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
-# are ignored (-1)
-_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
-_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
-# Total number of RPN examples per image
-_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
-# Target fraction of foreground (positive) examples per RPN minibatch
-_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
-# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
-_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
-# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
-_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
-_C.MODEL.RPN.LOSS_WEIGHT = 1.0
-# Number of top scoring RPN proposals to keep before applying NMS
-# When FPN is used, this is *per FPN level* (not total)
-_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
-_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
-# Number of top scoring RPN proposals to keep after applying NMS
-# When FPN is used, this limit is applied per level and then again to the union
-# of proposals from all levels
-# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
-# It means per-batch topk in Detectron1, but per-image topk here.
-# See "modeling/rpn/rpn_outputs.py" for details.
-_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
-_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
-# NMS threshold used on RPN proposals
-_C.MODEL.RPN.NMS_THRESH = 0.7
-
-# ---------------------------------------------------------------------------- #
-# ROI HEADS options
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ROI_HEADS = CN()
-_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
-# Number of foreground classes
-_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
-# Names of the input feature maps to be used by ROI heads
-# Currently all heads (box, mask, ...) use the same input feature map list
-# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
-_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
-# IOU overlap ratios [IOU_THRESHOLD]
-# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
-# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
-_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
-_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
-# RoI minibatch size *per image* (number of regions of interest [ROIs])
-# Total number of RoIs per training minibatch =
-#   ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
-# E.g., a common configuration is: 512 * 16 = 8192
-_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
-# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
-_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
-
-# Only used on test mode
-
-# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
-# balance obtaining high recall with not having too many low precision
-# detections that will slow down inference post processing steps (like NMS)
-# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
-# inference.
-_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
-# Overlap threshold used for non-maximum suppression (suppress boxes with
-# IoU >= this threshold)
-_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
-# If True, augment proposals with ground-truth boxes before sampling proposals to
-# train ROI heads.
-_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
-
-# ---------------------------------------------------------------------------- #
-# Box Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ROI_BOX_HEAD = CN()
-# C4 don't use head name option
-# Options for non-C4 models: FastRCNNConvFCHead,
-_C.MODEL.ROI_BOX_HEAD.NAME = ""
-# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
-# These are empirically chosen to approximately lead to unit variance targets
-_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
-# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
-_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
-_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
-_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
-# Type of pooling operation applied to the incoming feature map for each RoI
-_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
-
-_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
-# Hidden layer dimension for FC layers in the RoI box head
-_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
-_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
-# Channel dimension for Conv layers in the RoI box head
-_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
-# Normalization method for the convolution layers.
-# Options: "" (no norm), "GN", "SyncBN".
-_C.MODEL.ROI_BOX_HEAD.NORM = ""
-# Whether to use class agnostic for bbox regression
-_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
-# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
-_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
-
-# ---------------------------------------------------------------------------- #
-# Cascaded Box Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
-# The number of cascade stages is implicitly defined by the length of the following two configs.
-_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
-    (10.0, 10.0, 5.0, 5.0),
-    (20.0, 20.0, 10.0, 10.0),
-    (30.0, 30.0, 15.0, 15.0),
-)
-_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
-
-
-# ---------------------------------------------------------------------------- #
-# Mask Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ROI_MASK_HEAD = CN()
-_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
-_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
-_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
-_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0  # The number of convs in the mask head
-_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
-# Normalization method for the convolution layers.
-# Options: "" (no norm), "GN", "SyncBN".
-_C.MODEL.ROI_MASK_HEAD.NORM = ""
-# Whether to use class agnostic for mask prediction
-_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
-# Type of pooling operation applied to the incoming feature map for each RoI
-_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
-
-
-# ---------------------------------------------------------------------------- #
-# Keypoint Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.ROI_KEYPOINT_HEAD = CN()
-_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
-_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
-_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
-_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
-_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17  # 17 is the number of keypoints in COCO.
-
-# Images with too few (or no) keypoints are excluded from training.
-_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
-# Normalize by the total number of visible keypoints in the minibatch if True.
-# Otherwise, normalize by the total number of keypoints that could ever exist
-# in the minibatch.
-# The keypoint softmax loss is only calculated on visible keypoints.
-# Since the number of visible keypoints can vary significantly between
-# minibatches, this has the effect of up-weighting the importance of
-# minibatches with few visible keypoints. (Imagine the extreme case of
-# only one visible keypoint versus N: in the case of N, each one
-# contributes 1/N to the gradient compared to the single keypoint
-# determining the gradient direction). Instead, we can normalize the
-# loss by the total number of keypoints, if it were the case that all
-# keypoints were visible in a full minibatch. (Returning to the example,
-# this means that the one visible keypoint contributes as much as each
-# of the N keypoints.)
-_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
-# Multi-task loss weight to use for keypoints
-# Recommended values:
-#   - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
-#   - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
-_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
-# Type of pooling operation applied to the incoming feature map for each RoI
-_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
-
-# ---------------------------------------------------------------------------- #
-# Semantic Segmentation Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.SEM_SEG_HEAD = CN()
-_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
-_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
-# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
-# the correposnding pixel.
-_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
-# Number of classes in the semantic segmentation head
-_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
-# Number of channels in the 3x3 convs inside semantic-FPN heads.
-_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
-# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
-_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
-# Normalization method for the convolution layers. Options: "" (no norm), "GN".
-_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
-_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
-
-_C.MODEL.PANOPTIC_FPN = CN()
-# Scaling of all losses from instance detection / segmentation head.
-_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
-
-# options when combining instance & semantic segmentation outputs
-_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True})
-_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
-_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
-_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
-
-
-# ---------------------------------------------------------------------------- #
-# RetinaNet Head
-# ---------------------------------------------------------------------------- #
-_C.MODEL.RETINANET = CN()
-
-# This is the number of foreground classes.
-_C.MODEL.RETINANET.NUM_CLASSES = 80
-
-_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
-
-# Convolutions to use in the cls and bbox tower
-# NOTE: this doesn't include the last conv for logits
-_C.MODEL.RETINANET.NUM_CONVS = 4
-
-# IoU overlap ratio [bg, fg] for labeling anchors.
-# Anchors with < bg are labeled negative (0)
-# Anchors  with >= bg and < fg are ignored (-1)
-# Anchors with >= fg are labeled positive (1)
-_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
-_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
-
-# Prior prob for rare case (i.e. foreground) at the beginning of training.
-# This is used to set the bias for the logits layer of the classifier subnet.
-# This improves training stability in the case of heavy class imbalance.
-_C.MODEL.RETINANET.PRIOR_PROB = 0.01
-
-# Inference cls score threshold, only anchors with score > INFERENCE_TH are
-# considered for inference (to improve speed)
-_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
-_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
-_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
-
-# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
-_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
-
-# Loss parameters
-_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
-_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
-_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
-
-
-# ---------------------------------------------------------------------------- #
-# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
-# Note that parts of a resnet may be used for both the backbone and the head
-# These options apply to both
-# ---------------------------------------------------------------------------- #
-_C.MODEL.RESNETS = CN()
-
-_C.MODEL.RESNETS.DEPTH = 50
-_C.MODEL.RESNETS.OUT_FEATURES = ["res4"]  # res4 for C4 backbone, res2..5 for FPN backbone
-
-# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
-_C.MODEL.RESNETS.NUM_GROUPS = 1
-
-# Options: FrozenBN, GN, "SyncBN", "BN"
-_C.MODEL.RESNETS.NORM = "FrozenBN"
-
-# Baseline width of each group.
-# Scaling this parameters will scale the width of all bottleneck layers.
-_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
-
-# Place the stride 2 conv on the 1x1 filter
-# Use True only for the original MSRA ResNet; use False for C2 and Torch models
-_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
-
-# Apply dilation in stage "res5"
-_C.MODEL.RESNETS.RES5_DILATION = 1
-
-# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
-# For R18 and R34, this needs to be set to 64
-_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
-_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
-
-# Apply Deformable Convolution in stages
-# Specify if apply deform_conv on Res2, Res3, Res4, Res5
-_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
-# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
-# Use False for DeformableV1.
-_C.MODEL.RESNETS.DEFORM_MODULATED = False
-# Number of groups in deformable conv.
-_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
-
-
-# ---------------------------------------------------------------------------- #
-# Solver
-# ---------------------------------------------------------------------------- #
-_C.SOLVER = CN()
-
-# See detectron2/solver/build.py for LR scheduler options
-_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
-
-_C.SOLVER.MAX_ITER = 40000
-
-_C.SOLVER.BASE_LR = 0.001
-
-_C.SOLVER.MOMENTUM = 0.9
-
-_C.SOLVER.NESTEROV = False
-
-_C.SOLVER.WEIGHT_DECAY = 0.0001
-# The weight decay that's applied to parameters of normalization layers
-# (typically the affine transformation)
-_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
-
-_C.SOLVER.GAMMA = 0.1
-# The iteration number to decrease learning rate by GAMMA.
-_C.SOLVER.STEPS = (30000,)
-
-_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
-_C.SOLVER.WARMUP_ITERS = 1000
-_C.SOLVER.WARMUP_METHOD = "linear"
-
-# Save a checkpoint after every this number of iterations
-_C.SOLVER.CHECKPOINT_PERIOD = 5000
-
-# Number of images per batch across all machines.
-# If we have 16 GPUs and IMS_PER_BATCH = 32,
-# each GPU will see 2 images per batch.
-_C.SOLVER.IMS_PER_BATCH = 16
-
-# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
-# biases. This is not useful (at least for recent models). You should avoid
-# changing these and they exist only to reproduce Detectron v1 training if
-# desired.
-_C.SOLVER.BIAS_LR_FACTOR = 1.0
-_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY
-
-# Gradient clipping
-_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
-# Type of gradient clipping, currently 2 values are supported:
-# - "value": the absolute values of elements of each gradients are clipped
-# - "norm": the norm of the gradient for each parameter is clipped thus
-#   affecting all elements in the parameter
-_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
-# Maximum absolute value used for clipping gradients
-_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
-# Floating point number p for L-p norm to be used with the "norm"
-# gradient clipping type; for L-inf, please specify .inf
-_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
-
-# ---------------------------------------------------------------------------- #
-# Specific test options
-# ---------------------------------------------------------------------------- #
-_C.TEST = CN()
-# For end-to-end tests to verify the expected accuracy.
-# Each item is [task, metric, value, tolerance]
-# e.g.: [['bbox', 'AP', 38.5, 0.2]]
-_C.TEST.EXPECTED_RESULTS = []
-# The period (in terms of steps) to evaluate the model during training.
-# Set to 0 to disable.
-_C.TEST.EVAL_PERIOD = 0
-# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
-# When empty it will use the defaults in COCO.
-# Otherwise it should have the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
-_C.TEST.KEYPOINT_OKS_SIGMAS = []
-# Maximum number of detections to return per image during inference (100 is
-# based on the limit established for the COCO dataset).
-_C.TEST.DETECTIONS_PER_IMAGE = 100
-
-_C.TEST.AUG = CN({"ENABLED": False})
-_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
-_C.TEST.AUG.MAX_SIZE = 4000
-_C.TEST.AUG.FLIP = True
-
-_C.TEST.PRECISE_BN = CN({"ENABLED": False})
-_C.TEST.PRECISE_BN.NUM_ITER = 200
-
-# ---------------------------------------------------------------------------- #
-# Misc options
-# ---------------------------------------------------------------------------- #
-# Directory where output files are written
-_C.OUTPUT_DIR = "./output"
-# Set seed to negative to fully randomize everything.
-# Set seed to positive to use a fixed seed. Note that a fixed seed increases
-# reproducibility but does not guarantee fully deterministic behavior.
-# Disabling all parallelism further increases reproducibility.
-_C.SEED = -1
-# Benchmark different cudnn algorithms.
-# If input images have very different sizes, this option will have large overhead
-# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
-# If input images have the same or similar sizes, benchmark is often helpful.
-_C.CUDNN_BENCHMARK = False
-# The period (in terms of steps) for minibatch visualization at train time.
-# Set to 0 to disable.
-_C.VIS_PERIOD = 0
-
-# global config is for quick hack purposes.
-# You can set them in command line or config files,
-# and access it with:
-#
-# from detectron2.config import global_cfg
-# print(global_cfg.HACK)
-#
-# Do not commit any configs into it.
-_C.GLOBAL = CN()
-_C.GLOBAL.HACK = 1.0
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py
deleted file mode 100644
index e8f72e0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from . import transforms  # isort:skip
-
-from .build import (
-    build_detection_test_loader,
-    build_detection_train_loader,
-    get_detection_dataset_dicts,
-    load_proposals_into_dataset,
-    print_instances_class_histogram,
-)
-from .catalog import DatasetCatalog, MetadataCatalog
-from .common import DatasetFromList, MapDataset
-from .dataset_mapper import DatasetMapper
-
-# ensure the builtin data are registered
-from . import datasets, samplers  # isort:skip
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py
deleted file mode 100644
index cb7e857..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import bisect
-import copy
-import itertools
-import logging
-import numpy as np
-import operator
-import pickle
-import torch.utils.data
-from fvcore.common.file_io import PathManager
-from tabulate import tabulate
-from termcolor import colored
-
-from detectron2.structures import BoxMode
-from detectron2.utils.comm import get_world_size
-from detectron2.utils.env import seed_all_rng
-from detectron2.utils.logger import log_first_n
-
-from . import samplers
-from .catalog import DatasetCatalog, MetadataCatalog
-from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
-from .dataset_mapper import DatasetMapper
-from .detection_utils import check_metadata_consistency
-
-"""
-This file contains the default logic to build a dataloader for training or testing.
-"""
-
-__all__ = [
-    "build_detection_train_loader",
-    "build_detection_test_loader",
-    "get_detection_dataset_dicts",
-    "load_proposals_into_dataset",
-    "print_instances_class_histogram",
-]
-
-
-def filter_images_with_only_crowd_annotations(dataset_dicts):
-    """
-    Filter out images with none annotations or only crowd annotations
-    (i.e., images without non-crowd annotations).
-    A common training-time preprocessing on COCO dataset.
-
-    Args:
-        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
-
-    Returns:
-        list[dict]: the same format, but filtered.
-    """
-    num_before = len(dataset_dicts)
-
-    def valid(anns):
-        for ann in anns:
-            if ann.get("iscrowd", 0) == 0:
-                return True
-        return False
-
-    dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
-    num_after = len(dataset_dicts)
-    logger = logging.getLogger(__name__)
-    logger.info(
-        "Removed {} images with no usable annotations. {} images left.".format(
-            num_before - num_after, num_after
-        )
-    )
-    return dataset_dicts
-
-
-def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
-    """
-    Filter out images with too few number of keypoints.
-
-    Args:
-        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
-
-    Returns:
-        list[dict]: the same format as dataset_dicts, but filtered.
-    """
-    num_before = len(dataset_dicts)
-
-    def visible_keypoints_in_image(dic):
-        # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
-        annotations = dic["annotations"]
-        return sum(
-            (np.array(ann["keypoints"][2::3]) > 0).sum()
-            for ann in annotations
-            if "keypoints" in ann
-        )
-
-    dataset_dicts = [
-        x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
-    ]
-    num_after = len(dataset_dicts)
-    logger = logging.getLogger(__name__)
-    logger.info(
-        "Removed {} images with fewer than {} keypoints.".format(
-            num_before - num_after, min_keypoints_per_image
-        )
-    )
-    return dataset_dicts
-
-
-def load_proposals_into_dataset(dataset_dicts, proposal_file):
-    """
-    Load precomputed object proposals into the dataset.
-
-    The proposal file should be a pickled dict with the following keys:
-
-    - "ids": list[int] or list[str], the image ids
-    - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
-    - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
-      corresponding to the boxes.
-    - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
-
-    Args:
-        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
-        proposal_file (str): file path of pre-computed proposals, in pkl format.
-
-    Returns:
-        list[dict]: the same format as dataset_dicts, but added proposal field.
-    """
-    logger = logging.getLogger(__name__)
-    logger.info("Loading proposals from: {}".format(proposal_file))
-
-    with PathManager.open(proposal_file, "rb") as f:
-        proposals = pickle.load(f, encoding="latin1")
-
-    # Rename the key names in D1 proposal files
-    rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
-    for key in rename_keys:
-        if key in proposals:
-            proposals[rename_keys[key]] = proposals.pop(key)
-
-    # Fetch the indexes of all proposals that are in the dataset
-    # Convert image_id to str since they could be int.
-    img_ids = set({str(record["image_id"]) for record in dataset_dicts})
-    id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
-
-    # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
-    bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
-
-    for record in dataset_dicts:
-        # Get the index of the proposal
-        i = id_to_index[str(record["image_id"])]
-
-        boxes = proposals["boxes"][i]
-        objectness_logits = proposals["objectness_logits"][i]
-        # Sort the proposals in descending order of the scores
-        inds = objectness_logits.argsort()[::-1]
-        record["proposal_boxes"] = boxes[inds]
-        record["proposal_objectness_logits"] = objectness_logits[inds]
-        record["proposal_bbox_mode"] = bbox_mode
-
-    return dataset_dicts
-
-
-def _quantize(x, bin_edges):
-    bin_edges = copy.copy(bin_edges)
-    bin_edges = sorted(bin_edges)
-    quantized = list(map(lambda y: bisect.bisect_right(bin_edges, y), x))
-    return quantized
-
-
-def print_instances_class_histogram(dataset_dicts, class_names):
-    """
-    Args:
-        dataset_dicts (list[dict]): list of dataset dicts.
-        class_names (list[str]): list of class names (zero-indexed).
-    """
-    num_classes = len(class_names)
-    hist_bins = np.arange(num_classes + 1)
-    histogram = np.zeros((num_classes,), dtype=np.int)
-    for entry in dataset_dicts:
-        annos = entry["annotations"]
-        classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)]
-        histogram += np.histogram(classes, bins=hist_bins)[0]
-
-    N_COLS = min(6, len(class_names) * 2)
-
-    def short_name(x):
-        # make long class names shorter. useful for lvis
-        if len(x) > 13:
-            return x[:11] + ".."
-        return x
-
-    data = list(
-        itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
-    )
-    total_num_instances = sum(data[1::2])
-    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
-    if num_classes > 1:
-        data.extend(["total", total_num_instances])
-    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
-    table = tabulate(
-        data,
-        headers=["category", "#instances"] * (N_COLS // 2),
-        tablefmt="pipe",
-        numalign="left",
-        stralign="center",
-    )
-    log_first_n(
-        logging.INFO,
-        "Distribution of instances among all {} categories:\n".format(num_classes)
-        + colored(table, "cyan"),
-        key="message",
-    )
-
-
-def get_detection_dataset_dicts(
-    dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None
-):
-    """
-    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
-
-    Args:
-        dataset_names (list[str]): a list of dataset names
-        filter_empty (bool): whether to filter out images without instance annotations
-        min_keypoints (int): filter out images with fewer keypoints than
-            `min_keypoints`. Set to 0 to do nothing.
-        proposal_files (list[str]): if given, a list of object proposal files
-            that match each dataset in `dataset_names`.
-    """
-    assert len(dataset_names)
-    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
-    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
-        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
-
-    if proposal_files is not None:
-        assert len(dataset_names) == len(proposal_files)
-        # load precomputed proposals from proposal files
-        dataset_dicts = [
-            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
-            for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
-        ]
-
-    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
-
-    has_instances = "annotations" in dataset_dicts[0]
-    # Keep images without instance-level GT if the dataset has semantic labels.
-    if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[0]:
-        dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
-
-    if min_keypoints > 0 and has_instances:
-        dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
-
-    if has_instances:
-        try:
-            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
-            check_metadata_consistency("thing_classes", dataset_names)
-            print_instances_class_histogram(dataset_dicts, class_names)
-        except AttributeError:  # class names are not available for this dataset
-            pass
-    return dataset_dicts
-
-
-def build_detection_train_loader(cfg, mapper=None):
-    """
-    A data loader is created by the following steps:
-
-    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
-    2. Coordinate a random shuffle order shared among all processes (all GPUs)
-    3. Each process spawn another few workers to process the dicts. Each worker will:
-       * Map each metadata dict into another format to be consumed by the model.
-       * Batch them by simply putting dicts into a list.
-
-    The batched ``list[mapped_dict]`` is what this dataloader will yield.
-
-    Args:
-        cfg (CfgNode): the config
-        mapper (callable): a callable which takes a sample (dict) from dataset and
-            returns the format to be consumed by the model.
-            By default it will be `DatasetMapper(cfg, True)`.
-
-    Returns:
-        an infinite iterator of training data
-    """
-    num_workers = get_world_size()
-    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
-    assert (
-        images_per_batch % num_workers == 0
-    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
-        images_per_batch, num_workers
-    )
-    assert (
-        images_per_batch >= num_workers
-    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
-        images_per_batch, num_workers
-    )
-    images_per_worker = images_per_batch // num_workers
-
-    dataset_dicts = get_detection_dataset_dicts(
-        cfg.DATASETS.TRAIN,
-        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
-        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
-        if cfg.MODEL.KEYPOINT_ON
-        else 0,
-        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
-    )
-    dataset = DatasetFromList(dataset_dicts, copy=False)
-
-    if mapper is None:
-        mapper = DatasetMapper(cfg, True)
-    dataset = MapDataset(dataset, mapper)
-
-    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
-    logger = logging.getLogger(__name__)
-    logger.info("Using training sampler {}".format(sampler_name))
-    if sampler_name == "TrainingSampler":
-        sampler = samplers.TrainingSampler(len(dataset))
-    elif sampler_name == "RepeatFactorTrainingSampler":
-        sampler = samplers.RepeatFactorTrainingSampler(
-            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
-        )
-    else:
-        raise ValueError("Unknown training sampler: {}".format(sampler_name))
-
-    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
-        data_loader = torch.utils.data.DataLoader(
-            dataset,
-            sampler=sampler,
-            num_workers=cfg.DATALOADER.NUM_WORKERS,
-            batch_sampler=None,
-            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
-            worker_init_fn=worker_init_reset_seed,
-        )  # yield individual mapped dict
-        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
-    else:
-        batch_sampler = torch.utils.data.sampler.BatchSampler(
-            sampler, images_per_worker, drop_last=True
-        )
-        # drop_last so the batch always have the same size
-        data_loader = torch.utils.data.DataLoader(
-            dataset,
-            num_workers=cfg.DATALOADER.NUM_WORKERS,
-            batch_sampler=batch_sampler,
-            collate_fn=trivial_batch_collator,
-            worker_init_fn=worker_init_reset_seed,
-        )
-
-    return data_loader
-
-
-def build_detection_test_loader(cfg, dataset_name, mapper=None):
-    """
-    Similar to `build_detection_train_loader`.
-    But this function uses the given `dataset_name` argument (instead of the names in cfg),
-    and uses batch size 1.
-
-    Args:
-        cfg: a detectron2 CfgNode
-        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
-        mapper (callable): a callable which takes a sample (dict) from dataset
-           and returns the format to be consumed by the model.
-           By default it will be `DatasetMapper(cfg, False)`.
-
-    Returns:
-        DataLoader: a torch DataLoader, that loads the given detection
-        dataset, with test-time transformation and batching.
-    """
-    dataset_dicts = get_detection_dataset_dicts(
-        [dataset_name],
-        filter_empty=False,
-        proposal_files=[
-            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
-        ]
-        if cfg.MODEL.LOAD_PROPOSALS
-        else None,
-    )
-
-    dataset = DatasetFromList(dataset_dicts)
-    if mapper is None:
-        mapper = DatasetMapper(cfg, False)
-    dataset = MapDataset(dataset, mapper)
-
-    sampler = samplers.InferenceSampler(len(dataset))
-    # Always use 1 image per worker during inference since this is the
-    # standard when reporting inference time in papers.
-    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
-
-    data_loader = torch.utils.data.DataLoader(
-        dataset,
-        num_workers=cfg.DATALOADER.NUM_WORKERS,
-        batch_sampler=batch_sampler,
-        collate_fn=trivial_batch_collator,
-    )
-    return data_loader
-
-
-def trivial_batch_collator(batch):
-    """
-    A batch collator that does nothing.
-    """
-    return batch
-
-
-def worker_init_reset_seed(worker_id):
-    seed_all_rng(np.random.randint(2 ** 31) + worker_id)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py
deleted file mode 100644
index 57f18c8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import types
-from typing import List
-
-from detectron2.utils.logger import log_first_n
-
-__all__ = ["DatasetCatalog", "MetadataCatalog"]
-
-
-class DatasetCatalog(object):
-    """
-    A catalog that stores information about the data and how to obtain them.
-
-    It contains a mapping from strings
-    (which are names that identify a dataset, e.g. "coco_2014_train")
-    to a function which parses the dataset and returns the samples in the
-    format of `list[dict]`.
-
-    The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
-    if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
-
-    The purpose of having this catalog is to make it easy to choose
-    different data, by just using the strings in the config.
-    """
-
-    _REGISTERED = {}
-
-    @staticmethod
-    def register(name, func):
-        """
-        Args:
-            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
-            func (callable): a callable which takes no arguments and returns a list of dicts.
-        """
-        assert callable(func), "You must register a function with `DatasetCatalog.register`!"
-        assert name not in DatasetCatalog._REGISTERED, "Dataset '{}' is already registered!".format(
-            name
-        )
-        DatasetCatalog._REGISTERED[name] = func
-
-    @staticmethod
-    def get(name):
-        """
-        Call the registered function and return its results.
-
-        Args:
-            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
-
-        Returns:
-            list[dict]: dataset annotations.0
-        """
-        try:
-            f = DatasetCatalog._REGISTERED[name]
-        except KeyError:
-            raise KeyError(
-                "Dataset '{}' is not registered! Available data are: {}".format(
-                    name, ", ".join(DatasetCatalog._REGISTERED.keys())
-                )
-            )
-        return f()
-
-    @staticmethod
-    def list() -> List[str]:
-        """
-        List all registered data.
-
-        Returns:
-            list[str]
-        """
-        return list(DatasetCatalog._REGISTERED.keys())
-
-    @staticmethod
-    def clear():
-        """
-        Remove all registered dataset.
-        """
-        DatasetCatalog._REGISTERED.clear()
-
-
-class Metadata(types.SimpleNamespace):
-    """
-    A class that supports simple attribute setter/getter.
-    It is intended for storing metadata of a dataset and make it accessible globally.
-
-    Examples:
-
-    .. code-block:: python
-
-        # somewhere when you load the data:
-        MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
-
-        # somewhere when you print statistics or visualize:
-        classes = MetadataCatalog.get("mydataset").thing_classes
-    """
-
-    # the name of the dataset
-    # set default to N/A so that `self.name` in the errors will not trigger getattr again
-    name: str = "N/A"
-
-    _RENAMED = {
-        "class_names": "thing_classes",
-        "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
-        "stuff_class_names": "stuff_classes",
-    }
-
-    def __getattr__(self, key):
-        if key in self._RENAMED:
-            log_first_n(
-                logging.WARNING,
-                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
-                n=10,
-            )
-            return getattr(self, self._RENAMED[key])
-
-        raise AttributeError(
-            "Attribute '{}' does not exist in the metadata of '{}'. Available keys are {}.".format(
-                key, self.name, str(self.__dict__.keys())
-            )
-        )
-
-    def __setattr__(self, key, val):
-        if key in self._RENAMED:
-            log_first_n(
-                logging.WARNING,
-                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
-                n=10,
-            )
-            setattr(self, self._RENAMED[key], val)
-
-        # Ensure that metadata of the same name stays consistent
-        try:
-            oldval = getattr(self, key)
-            assert oldval == val, (
-                "Attribute '{}' in the metadata of '{}' cannot be set "
-                "to a different value!\n{} != {}".format(key, self.name, oldval, val)
-            )
-        except AttributeError:
-            super().__setattr__(key, val)
-
-    def as_dict(self):
-        """
-        Returns all the metadata as a dict.
-        Note that modifications to the returned dict will not reflect on the Metadata object.
-        """
-        return copy.copy(self.__dict__)
-
-    def set(self, **kwargs):
-        """
-        Set multiple metadata with kwargs.
-        """
-        for k, v in kwargs.items():
-            setattr(self, k, v)
-        return self
-
-    def get(self, key, default=None):
-        """
-        Access an attribute and return its value if exists.
-        Otherwise return default.
-        """
-        try:
-            return getattr(self, key)
-        except AttributeError:
-            return default
-
-
-class MetadataCatalog:
-    """
-    MetadataCatalog provides access to "Metadata" of a given dataset.
-
-    The metadata associated with a certain name is a singleton: once created,
-    the metadata will stay alive and will be returned by future calls to `get(name)`.
-
-    It's like global variables, so don't abuse it.
-    It's meant for storing knowledge that's constant and shared across the execution
-    of the program, e.g.: the class names in COCO.
-    """
-
-    _NAME_TO_META = {}
-
-    @staticmethod
-    def get(name):
-        """
-        Args:
-            name (str): name of a dataset (e.g. coco_2014_train).
-
-        Returns:
-            Metadata: The :class:`Metadata` instance associated with this name,
-            or create an empty one if none is available.
-        """
-        assert len(name)
-        if name in MetadataCatalog._NAME_TO_META:
-            ret = MetadataCatalog._NAME_TO_META[name]
-            # TODO this is for the BC breaking change in D15247032.
-            # Remove this in the future.
-            if hasattr(ret, "dataset_name"):
-                logger = logging.getLogger()
-                logger.warning(
-                    """
-The 'dataset_name' key in metadata is no longer used for
-sharing metadata among splits after D15247032! Add
-metadata to each split (now called dataset) separately!
-                    """
-                )
-                parent_meta = MetadataCatalog.get(ret.dataset_name).as_dict()
-                ret.set(**parent_meta)
-            return ret
-        else:
-            m = MetadataCatalog._NAME_TO_META[name] = Metadata(name=name)
-            return m
-
-    @staticmethod
-    def list():
-        """
-        List all registered metadata.
-
-        Returns:
-            list[str]: keys (names of data) of all registered metadata
-        """
-        return list(MetadataCatalog._NAME_TO_META.keys())
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py
deleted file mode 100644
index a42c8b2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import numpy as np
-import pickle
-import random
-import torch.utils.data as data
-
-from detectron2.utils.serialize import PicklableWrapper
-
-__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset"]
-
-
-class MapDataset(data.Dataset):
-    """
-    Map a function over the elements in a dataset.
-
-    Args:
-        dataset: a dataset where map function is applied.
-        map_func: a callable which maps the element in dataset. map_func is
-            responsible for error handling, when error happens, it needs to
-            return None so the MapDataset will randomly use other
-            elements from the dataset.
-    """
-
-    def __init__(self, dataset, map_func):
-        self._dataset = dataset
-        self._map_func = PicklableWrapper(map_func)  # wrap so that a lambda will work
-
-        self._rng = random.Random(42)
-        self._fallback_candidates = set(range(len(dataset)))
-
-    def __len__(self):
-        return len(self._dataset)
-
-    def __getitem__(self, idx):
-        retry_count = 0
-        cur_idx = int(idx)
-
-        while True:
-            data = self._map_func(self._dataset[cur_idx])
-            if data is not None:
-                self._fallback_candidates.add(cur_idx)
-                return data
-
-            # _map_func fails for this idx, use a random new index from the pool
-            retry_count += 1
-            self._fallback_candidates.discard(cur_idx)
-            cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
-
-            if retry_count >= 3:
-                logger = logging.getLogger(__name__)
-                logger.warning(
-                    "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
-                        idx, retry_count
-                    )
-                )
-
-
-class DatasetFromList(data.Dataset):
-    """
-    Wrap a list to a torch Dataset. It produces elements of the list as data.
-    """
-
-    def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
-        """
-        Args:
-            lst (list): a list which contains elements to produce.
-            copy (bool): whether to deepcopy the element when producing it,
-                so that the result can be modified in place without affecting the
-                source in the list.
-            serialize (bool): whether to hold memory using serialized objects, when
-                enabled, data loader workers can use shared RAM from master
-                process instead of making a copy.
-        """
-        self._lst = lst
-        self._copy = copy
-        self._serialize = serialize
-
-        def _serialize(data):
-            buffer = pickle.dumps(data, protocol=-1)
-            return np.frombuffer(buffer, dtype=np.uint8)
-
-        if self._serialize:
-            logger = logging.getLogger(__name__)
-            logger.info(
-                "Serializing {} elements to byte tensors and concatenating them all ...".format(
-                    len(self._lst)
-                )
-            )
-            self._lst = [_serialize(x) for x in self._lst]
-            self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
-            self._addr = np.cumsum(self._addr)
-            self._lst = np.concatenate(self._lst)
-            logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
-
-    def __len__(self):
-        if self._serialize:
-            return len(self._addr)
-        else:
-            return len(self._lst)
-
-    def __getitem__(self, idx):
-        if self._serialize:
-            start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
-            end_addr = self._addr[idx].item()
-            bytes = memoryview(self._lst[start_addr:end_addr])
-            return pickle.loads(bytes)
-        elif self._copy:
-            return copy.deepcopy(self._lst[idx])
-        else:
-            return self._lst[idx]
-
-
-class AspectRatioGroupedDataset(data.IterableDataset):
-    """
-    Batch data that have similar aspect ratio together.
-    In this implementation, images whose aspect ratio < (or >) 1 will
-    be batched together.
-    This improves training speed because the images then need less padding
-    to form a batch.
-
-    It assumes the underlying dataset produces dicts with "width" and "height" keys.
-    It will then produce a list of original dicts with length = batch_size,
-    all with similar aspect ratios.
-    """
-
-    def __init__(self, dataset, batch_size):
-        """
-        Args:
-            dataset: an iterable. Each element must be a dict with keys
-                "width" and "height", which will be used to batch data.
-            batch_size (int):
-        """
-        self.dataset = dataset
-        self.batch_size = batch_size
-        self._buckets = [[] for _ in range(2)]
-        # Hard-coded two aspect ratio groups: w > h and w < h.
-        # Can add support for more aspect ratio groups, but doesn't seem useful
-
-    def __iter__(self):
-        for d in self.dataset:
-            w, h = d["width"], d["height"]
-            bucket_id = 0 if w > h else 1
-            bucket = self._buckets[bucket_id]
-            bucket.append(d)
-            if len(bucket) == self.batch_size:
-                yield bucket[:]
-                del bucket[:]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py
deleted file mode 100644
index db73b37..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import numpy as np
-import torch
-from fvcore.common.file_io import PathManager
-from PIL import Image
-
-from . import detection_utils as utils
-from . import transforms as T
-
-"""
-This file contains the default mapping that's applied to "dataset dicts".
-"""
-
-__all__ = ["DatasetMapper"]
-
-
-class DatasetMapper:
-    """
-    A callable which takes a dataset dict in Detectron2 Dataset format,
-    and map it into a format used by the model.
-
-    This is the default callable to be used to map your dataset dict into training data.
-    You may need to follow it to implement your own one for customized logic,
-    such as a different way to read or transform images.
-    See :doc:`/tutorials/data_loading` for details.
-
-    The callable currently does the following:
-
-    1. Read the image from "file_name"
-    2. Applies cropping/geometric transforms to the image and annotations
-    3. Prepare data and annotations to Tensor and :class:`Instances`
-    """
-
-    def __init__(self, cfg, is_train=True):
-        if cfg.INPUT.CROP.ENABLED and is_train:
-            self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
-            logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
-        else:
-            self.crop_gen = None
-
-        self.tfm_gens = utils.build_transform_gen(cfg, is_train)
-
-        # fmt: off
-        self.img_format     = cfg.INPUT.FORMAT
-        self.mask_on        = cfg.MODEL.MASK_ON
-        self.mask_format    = cfg.INPUT.MASK_FORMAT
-        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
-        self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
-        # fmt: on
-        if self.keypoint_on and is_train:
-            # Flip only makes sense in training
-            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
-        else:
-            self.keypoint_hflip_indices = None
-
-        if self.load_proposals:
-            self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
-            self.proposal_topk = (
-                cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
-                if is_train
-                else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
-            )
-        self.is_train = is_train
-
-    def __call__(self, dataset_dict):
-        """
-        Args:
-            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
-
-        Returns:
-            dict: a format that builtin models in detectron2 accept
-        """
-        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
-        # USER: Write your own image loading if it's not from a file
-        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
-        utils.check_image_size(dataset_dict, image)
-
-        if "annotations" not in dataset_dict:
-            image, transforms = T.apply_transform_gens(
-                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
-            )
-        else:
-            # Crop around an instance if there are instances in the image.
-            # USER: Remove if you don't use cropping
-            if self.crop_gen:
-                crop_tfm = utils.gen_crop_transform_with_instance(
-                    self.crop_gen.get_crop_size(image.shape[:2]),
-                    image.shape[:2],
-                    np.random.choice(dataset_dict["annotations"]),
-                )
-                image = crop_tfm.apply_image(image)
-            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
-            if self.crop_gen:
-                transforms = crop_tfm + transforms
-
-        image_shape = image.shape[:2]  # h, w
-
-        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
-        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
-        # Therefore it's important to use torch.Tensor.
-        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
-
-        # USER: Remove if you don't use pre-computed proposals.
-        if self.load_proposals:
-            utils.transform_proposals(
-                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
-            )
-
-        if not self.is_train:
-            # USER: Modify this if you want to keep them for some reason.
-            dataset_dict.pop("annotations", None)
-            dataset_dict.pop("sem_seg_file_name", None)
-            return dataset_dict
-
-        if "annotations" in dataset_dict:
-            # USER: Modify this if you want to keep them for some reason.
-            for anno in dataset_dict["annotations"]:
-                if not self.mask_on:
-                    anno.pop("segmentation", None)
-                if not self.keypoint_on:
-                    anno.pop("keypoints", None)
-
-            # USER: Implement additional transformations if you have other types of data
-            annos = [
-                utils.transform_instance_annotations(
-                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
-                )
-                for obj in dataset_dict.pop("annotations")
-                if obj.get("iscrowd", 0) == 0
-            ]
-            instances = utils.annotations_to_instances(
-                annos, image_shape, mask_format=self.mask_format
-            )
-            # Create a tight bounding box from masks, useful when image is cropped
-            if self.crop_gen and instances.has("gt_masks"):
-                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
-            dataset_dict["instances"] = utils.filter_empty_instances(instances)
-
-        # USER: Remove if you don't do semantic/panoptic segmentation.
-        if "sem_seg_file_name" in dataset_dict:
-            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
-                sem_seg_gt = Image.open(f)
-                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
-            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
-            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
-            dataset_dict["sem_seg"] = sem_seg_gt
-        return dataset_dict
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md
deleted file mode 100644
index 9fb3e4f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-### Common Datasets
-
-The dataset implemented here do not need to load the data into the final format.
-It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
-
-For example, for an image dataset, just provide the file names and labels, but don't read the images.
-Let the downstream decide how to read.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py
deleted file mode 100644
index 9c3f556..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .cityscapes import load_cityscapes_instances
-from .coco import load_coco_json, load_sem_seg
-from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
-from .register_coco import register_coco_instances, register_coco_panoptic_separated
-from . import builtin  # ensure the builtin data are registered
-
-
-__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py
deleted file mode 100644
index 21ac222..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-
-"""
-This file registers pre-defined data at hard-coded paths, and their metadata.
-
-We hard-code metadata for common data. This will enable:
-1. Consistency check when loading the data
-2. Use models on these standard data directly and run demos,
-   without having to download the dataset annotations
-
-We hard-code some paths to the dataset that's assumed to
-exist in "./data/".
-
-Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
-To add new dataset, refer to the tutorial "docs/DATASETS.md".
-"""
-
-import os
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-
-from .builtin_meta import _get_builtin_metadata
-from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
-from .lvis import get_lvis_instances_meta, register_lvis_instances
-from .pascal_voc import register_pascal_voc
-from .register_coco import register_coco_instances, register_coco_panoptic_separated
-
-# ==== Predefined data and splits for COCO ==========
-
-_PREDEFINED_SPLITS_COCO = {}
-_PREDEFINED_SPLITS_COCO["coco"] = {
-    "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
-    "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
-    "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
-    "coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"),
-    "coco_2014_valminusminival": (
-        "coco/val2014",
-        "coco/annotations/instances_valminusminival2014.json",
-    ),
-    "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
-    "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
-    "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
-    "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
-    "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
-}
-
-_PREDEFINED_SPLITS_COCO["coco_person"] = {
-    "keypoints_coco_2014_train": (
-        "coco/train2014",
-        "coco/annotations/person_keypoints_train2014.json",
-    ),
-    "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
-    "keypoints_coco_2014_minival": (
-        "coco/val2014",
-        "coco/annotations/person_keypoints_minival2014.json",
-    ),
-    "keypoints_coco_2014_valminusminival": (
-        "coco/val2014",
-        "coco/annotations/person_keypoints_valminusminival2014.json",
-    ),
-    "keypoints_coco_2014_minival_100": (
-        "coco/val2014",
-        "coco/annotations/person_keypoints_minival2014_100.json",
-    ),
-    "keypoints_coco_2017_train": (
-        "coco/train2017",
-        "coco/annotations/person_keypoints_train2017.json",
-    ),
-    "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
-    "keypoints_coco_2017_val_100": (
-        "coco/val2017",
-        "coco/annotations/person_keypoints_val2017_100.json",
-    ),
-}
-
-
-_PREDEFINED_SPLITS_COCO_PANOPTIC = {
-    "coco_2017_train_panoptic": (
-        # This is the original panoptic annotation directory
-        "coco/panoptic_train2017",
-        "coco/annotations/panoptic_train2017.json",
-        # This directory contains semantic annotations that are
-        # converted from panoptic annotations.
-        # It is used by PanopticFPN.
-        # You can use the script at detectron2/data/prepare_panoptic_fpn.py
-        # to create these directories.
-        "coco/panoptic_stuff_train2017",
-    ),
-    "coco_2017_val_panoptic": (
-        "coco/panoptic_val2017",
-        "coco/annotations/panoptic_val2017.json",
-        "coco/panoptic_stuff_val2017",
-    ),
-    "coco_2017_val_100_panoptic": (
-        "coco/panoptic_val2017_100",
-        "coco/annotations/panoptic_val2017_100.json",
-        "coco/panoptic_stuff_val2017_100",
-    ),
-}
-
-
-def register_all_coco(root):
-    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
-        for key, (image_root, json_file) in splits_per_dataset.items():
-            # Assume pre-defined data live in `./data`.
-            register_coco_instances(
-                key,
-                _get_builtin_metadata(dataset_name),
-                os.path.join(root, json_file) if "://" not in json_file else json_file,
-                os.path.join(root, image_root),
-            )
-
-    for (
-        prefix,
-        (panoptic_root, panoptic_json, semantic_root),
-    ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
-        prefix_instances = prefix[: -len("_panoptic")]
-        instances_meta = MetadataCatalog.get(prefix_instances)
-        image_root, instances_json = instances_meta.image_root, instances_meta.json_file
-        register_coco_panoptic_separated(
-            prefix,
-            _get_builtin_metadata("coco_panoptic_separated"),
-            image_root,
-            os.path.join(root, panoptic_root),
-            os.path.join(root, panoptic_json),
-            os.path.join(root, semantic_root),
-            instances_json,
-        )
-
-
-# ==== Predefined data and splits for LVIS ==========
-
-
-_PREDEFINED_SPLITS_LVIS = {
-    "lvis_v0.5": {
-        "lvis_v0.5_train": ("coco/train2017", "lvis/lvis_v0.5_train.json"),
-        "lvis_v0.5_val": ("coco/val2017", "lvis/lvis_v0.5_val.json"),
-        "lvis_v0.5_val_rand_100": ("coco/val2017", "lvis/lvis_v0.5_val_rand_100.json"),
-        "lvis_v0.5_test": ("coco/test2017", "lvis/lvis_v0.5_image_info_test.json"),
-    },
-    "lvis_v0.5_cocofied": {
-        "lvis_v0.5_train_cocofied": ("coco/train2017", "lvis/lvis_v0.5_train_cocofied.json"),
-        "lvis_v0.5_val_cocofied": ("coco/val2017", "lvis/lvis_v0.5_val_cocofied.json"),
-    },
-}
-
-
-def register_all_lvis(root):
-    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
-        for key, (image_root, json_file) in splits_per_dataset.items():
-            # Assume pre-defined data live in `./data`.
-            register_lvis_instances(
-                key,
-                get_lvis_instances_meta(dataset_name),
-                os.path.join(root, json_file) if "://" not in json_file else json_file,
-                os.path.join(root, image_root),
-            )
-
-
-# ==== Predefined splits for raw cityscapes images ===========
-
-
-_RAW_CITYSCAPES_SPLITS = {
-    "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"),
-    "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"),
-    "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"),
-}
-
-
-def register_all_cityscapes(root):
-    for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
-        meta = _get_builtin_metadata("cityscapes")
-        image_dir = os.path.join(root, image_dir)
-        gt_dir = os.path.join(root, gt_dir)
-
-        inst_key = key.format(task="instance_seg")
-        DatasetCatalog.register(
-            inst_key,
-            lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
-                x, y, from_json=True, to_polygons=True
-            ),
-        )
-        MetadataCatalog.get(inst_key).set(
-            image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
-        )
-
-        sem_key = key.format(task="sem_seg")
-        DatasetCatalog.register(
-            sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
-        )
-        MetadataCatalog.get(sem_key).set(
-            image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_sem_seg", **meta
-        )
-
-
-# ==== Predefined splits for PASCAL VOC ===========
-def register_all_pascal_voc(root):
-    SPLITS = [
-        ("voc_2007_trainval", "VOC2007", "trainval"),
-        ("voc_2007_train", "VOC2007", "train"),
-        ("voc_2007_val", "VOC2007", "val"),
-        ("voc_2007_test", "VOC2007", "test"),
-        ("voc_2012_trainval", "VOC2012", "trainval"),
-        ("voc_2012_train", "VOC2012", "train"),
-        ("voc_2012_val", "VOC2012", "val"),
-    ]
-    for name, dirname, split in SPLITS:
-        year = 2007 if "2007" in name else 2012
-        register_pascal_voc(name, os.path.join(root, dirname), split, year)
-        MetadataCatalog.get(name).evaluator_type = "pascal_voc"
-
-
-# Register them all under "./data"
-_root = os.getenv("DETECTRON2_DATASETS", "data")
-register_all_coco(_root)
-register_all_lvis(_root)
-register_all_cityscapes(_root)
-register_all_pascal_voc(_root)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py
deleted file mode 100644
index 74c7986..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-
-# All coco categories, together with their nice-looking visualization colors
-# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
-COCO_CATEGORIES = [
-    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
-    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
-    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
-    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
-    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
-    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
-    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
-    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
-    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
-    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
-    {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
-    {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
-    {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
-    {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
-    {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
-    {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
-    {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
-    {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
-    {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
-    {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
-    {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
-    {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
-    {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
-    {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
-    {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
-    {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
-    {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
-    {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
-    {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
-    {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
-    {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
-    {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
-    {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
-    {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
-    {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
-    {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
-    {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
-    {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
-    {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
-    {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
-    {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
-    {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
-    {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
-    {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
-    {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
-    {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
-    {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
-    {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
-    {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
-    {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
-    {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
-    {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
-    {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
-    {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
-    {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
-    {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
-    {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
-    {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
-    {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
-    {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
-    {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
-    {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
-    {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
-    {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
-    {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
-    {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
-    {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
-    {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
-    {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
-    {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
-    {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
-    {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
-    {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
-    {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
-    {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
-    {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
-    {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
-    {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
-    {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
-    {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
-    {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
-    {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
-    {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
-    {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
-    {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
-    {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
-    {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
-    {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
-    {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
-    {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
-    {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
-    {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
-    {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
-    {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
-    {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
-    {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
-    {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
-    {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
-    {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
-    {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
-    {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
-    {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
-    {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
-    {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
-    {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
-    {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
-    {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
-    {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
-    {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
-    {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
-    {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
-    {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
-    {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
-    {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
-    {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
-    {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
-    {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
-    {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
-    {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
-    {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
-    {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
-    {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
-    {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
-    {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
-    {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
-    {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
-    {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
-    {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
-    {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
-    {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
-    {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
-    {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
-    {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
-]
-
-# fmt: off
-COCO_PERSON_KEYPOINT_NAMES = (
-    "nose",
-    "left_eye", "right_eye",
-    "left_ear", "right_ear",
-    "left_shoulder", "right_shoulder",
-    "left_elbow", "right_elbow",
-    "left_wrist", "right_wrist",
-    "left_hip", "right_hip",
-    "left_knee", "right_knee",
-    "left_ankle", "right_ankle",
-)
-# fmt: on
-
-# Pairs of keypoints that should be exchanged under horizontal flipping
-COCO_PERSON_KEYPOINT_FLIP_MAP = (
-    ("left_eye", "right_eye"),
-    ("left_ear", "right_ear"),
-    ("left_shoulder", "right_shoulder"),
-    ("left_elbow", "right_elbow"),
-    ("left_wrist", "right_wrist"),
-    ("left_hip", "right_hip"),
-    ("left_knee", "right_knee"),
-    ("left_ankle", "right_ankle"),
-)
-
-# rules for pairs of keypoints to draw a line between, and the line color to use.
-KEYPOINT_CONNECTION_RULES = [
-    # face
-    ("left_ear", "left_eye", (102, 204, 255)),
-    ("right_ear", "right_eye", (51, 153, 255)),
-    ("left_eye", "nose", (102, 0, 204)),
-    ("nose", "right_eye", (51, 102, 255)),
-    # upper-body
-    ("left_shoulder", "right_shoulder", (255, 128, 0)),
-    ("left_shoulder", "left_elbow", (153, 255, 204)),
-    ("right_shoulder", "right_elbow", (128, 229, 255)),
-    ("left_elbow", "left_wrist", (153, 255, 153)),
-    ("right_elbow", "right_wrist", (102, 255, 224)),
-    # lower-body
-    ("left_hip", "right_hip", (255, 102, 0)),
-    ("left_hip", "left_knee", (255, 255, 77)),
-    ("right_hip", "right_knee", (153, 255, 204)),
-    ("left_knee", "left_ankle", (191, 255, 128)),
-    ("right_knee", "right_ankle", (255, 195, 77)),
-]
-
-
-def _get_coco_instances_meta():
-    thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
-    thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
-    assert len(thing_ids) == 80, len(thing_ids)
-    # Mapping from the incontiguous COCO category id to an id in [0, 79]
-    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
-    thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
-    ret = {
-        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
-        "thing_classes": thing_classes,
-        "thing_colors": thing_colors,
-    }
-    return ret
-
-
-def _get_coco_panoptic_separated_meta():
-    """
-    Returns metadata for "separated" version of the panoptic segmentation dataset.
-    """
-    stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
-    assert len(stuff_ids) == 53, len(stuff_ids)
-
-    # For semantic segmentation, this mapping maps from contiguous stuff id
-    # (in [0, 53], used in models) to ids in the dataset (used for processing results)
-    # The id 0 is mapped to an extra category "thing".
-    stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
-    # When converting COCO panoptic annotations to semantic annotations
-    # We label the "thing" category to 0
-    stuff_dataset_id_to_contiguous_id[0] = 0
-
-    # 54 names for COCO stuff categories (including "things")
-    stuff_classes = ["things"] + [
-        k["name"].replace("-other", "").replace("-merged", "")
-        for k in COCO_CATEGORIES
-        if k["isthing"] == 0
-    ]
-
-    # NOTE: I randomly picked a color for things
-    stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
-    ret = {
-        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
-        "stuff_classes": stuff_classes,
-        "stuff_colors": stuff_colors,
-    }
-    ret.update(_get_coco_instances_meta())
-    return ret
-
-
-def _get_builtin_metadata(dataset_name):
-    if dataset_name == "coco":
-        return _get_coco_instances_meta()
-    if dataset_name == "coco_panoptic_separated":
-        return _get_coco_panoptic_separated_meta()
-    elif dataset_name == "coco_person":
-        return {
-            "thing_classes": ["person"],
-            "keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
-            "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
-            "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
-        }
-    elif dataset_name == "cityscapes":
-        # fmt: off
-        CITYSCAPES_THING_CLASSES = [
-            "person", "rider", "car", "truck",
-            "bus", "train", "motorcycle", "bicycle",
-        ]
-        CITYSCAPES_STUFF_CLASSES = [
-            "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
-            "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
-            "truck", "bus", "train", "motorcycle", "bicycle", "license plate",
-        ]
-        # fmt: on
-        return {
-            "thing_classes": CITYSCAPES_THING_CLASSES,
-            "stuff_classes": CITYSCAPES_STUFF_CLASSES,
-        }
-    raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py
deleted file mode 100644
index 062a555..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py
+++ /dev/null
@@ -1,329 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import functools
-import json
-import logging
-import multiprocessing as mp
-import numpy as np
-import os
-from itertools import chain
-import pycocotools.mask as mask_util
-from fvcore.common.file_io import PathManager
-from PIL import Image
-
-from detectron2.structures import BoxMode
-from detectron2.utils.comm import get_world_size
-from detectron2.utils.logger import setup_logger
-
-try:
-    import cv2  # noqa
-except ImportError:
-    # OpenCV is an optional dependency at the moment
-    pass
-
-
-logger = logging.getLogger(__name__)
-
-
-def get_cityscapes_files(image_dir, gt_dir):
-    files = []
-    # scan through the directory
-    cities = PathManager.ls(image_dir)
-    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
-    for city in cities:
-        city_img_dir = os.path.join(image_dir, city)
-        city_gt_dir = os.path.join(gt_dir, city)
-        for basename in PathManager.ls(city_img_dir):
-            image_file = os.path.join(city_img_dir, basename)
-
-            suffix = "leftImg8bit.png"
-            assert basename.endswith(suffix)
-            basename = basename[: -len(suffix)]
-
-            instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
-            label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
-            json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
-
-            files.append((image_file, instance_file, label_file, json_file))
-    assert len(files), "No images found in {}".format(image_dir)
-    for f in files[0]:
-        assert PathManager.isfile(f), f
-    return files
-
-
-def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
-    """
-    Args:
-        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
-        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
-        from_json (bool): whether to read annotations from the raw json file or the png files.
-        to_polygons (bool): whether to represent the segmentation as polygons
-            (COCO's format) instead of masks (cityscapes's format).
-
-    Returns:
-        list[dict]: a list of dicts in Detectron2 standard format. (See
-        `Using Custom Datasets </tutorials/data.html>`_ )
-    """
-    if from_json:
-        assert to_polygons, (
-            "Cityscapes's json annotations are in polygon format. "
-            "Converting to mask format is not supported now."
-        )
-    files = get_cityscapes_files(image_dir, gt_dir)
-
-    logger.info("Preprocessing cityscapes annotations ...")
-    # This is still not fast: all workers will execute duplicate works and will
-    # take up to 10m on a 8GPU server.
-    pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
-
-    ret = pool.map(
-        functools.partial(cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
-        files,
-    )
-    logger.info("Loaded {} images from {}".format(len(ret), image_dir))
-
-    # Map cityscape ids to contiguous ids
-    from cityscapesscripts.helpers.labels import labels
-
-    labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
-    dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
-    for dict_per_image in ret:
-        for anno in dict_per_image["annotations"]:
-            anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
-    return ret
-
-
-def load_cityscapes_semantic(image_dir, gt_dir):
-    """
-    Args:
-        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
-        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
-
-    Returns:
-        list[dict]: a list of dict, each has "file_name" and
-            "sem_seg_file_name".
-    """
-    ret = []
-    # gt_dir is small and contain many small files. make sense to fetch to local first
-    gt_dir = PathManager.get_local_path(gt_dir)
-    for image_file, _, label_file, json_file in get_cityscapes_files(image_dir, gt_dir):
-        label_file = label_file.replace("labelIds", "labelTrainIds")
-
-        with PathManager.open(json_file, "r") as f:
-            jsonobj = json.load(f)
-        ret.append(
-            {
-                "file_name": image_file,
-                "sem_seg_file_name": label_file,
-                "height": jsonobj["imgHeight"],
-                "width": jsonobj["imgWidth"],
-            }
-        )
-    assert len(ret), f"No images found in {image_dir}!"
-    assert PathManager.isfile(
-        ret[0]["sem_seg_file_name"]
-    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
-    return ret
-
-
-def cityscapes_files_to_dict(files, from_json, to_polygons):
-    """
-    Parse cityscapes annotation files to a instance segmentation dataset dict.
-
-    Args:
-        files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
-        from_json (bool): whether to read annotations from the raw json file or the png files.
-        to_polygons (bool): whether to represent the segmentation as polygons
-            (COCO's format) instead of masks (cityscapes's format).
-
-    Returns:
-        A dict in Detectron2 Dataset format.
-    """
-    from cityscapesscripts.helpers.labels import id2label, name2label
-
-    image_file, instance_id_file, _, json_file = files
-
-    annos = []
-
-    if from_json:
-        from shapely.geometry import MultiPolygon, Polygon
-
-        with PathManager.open(json_file, "r") as f:
-            jsonobj = json.load(f)
-        ret = {
-            "file_name": image_file,
-            "image_id": os.path.basename(image_file),
-            "height": jsonobj["imgHeight"],
-            "width": jsonobj["imgWidth"],
-        }
-
-        # `polygons_union` contains the union of all valid polygons.
-        polygons_union = Polygon()
-
-        # CityscapesScripts draw the polygons in sequential order
-        # and each polygon *overwrites* existing ones. See
-        # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
-        # We use reverse order, and each polygon *avoids* early ones.
-        # This will resolve the ploygon overlaps in the same way as CityscapesScripts.
-        for obj in jsonobj["objects"][::-1]:
-            if "deleted" in obj:  # cityscapes data format specific
-                continue
-            label_name = obj["label"]
-
-            try:
-                label = name2label[label_name]
-            except KeyError:
-                if label_name.endswith("group"):  # crowd area
-                    label = name2label[label_name[: -len("group")]]
-                else:
-                    raise
-            if label.id < 0:  # cityscapes data format
-                continue
-
-            # Cityscapes's raw annotations uses integer coordinates
-            # Therefore +0.5 here
-            poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
-            # CityscapesScript uses PIL.ImageDraw.polygon to rasterize
-            # polygons for evaluation. This function operates in integer space
-            # and draws each pixel whose center falls into the polygon.
-            # Therefore it draws a polygon which is 0.5 "fatter" in expectation.
-            # We therefore dilate the input polygon by 0.5 as our input.
-            poly = Polygon(poly_coord).buffer(0.5, resolution=4)
-
-            if not label.hasInstances or label.ignoreInEval:
-                # even if we won't store the polygon it still contributes to overlaps resolution
-                polygons_union = polygons_union.union(poly)
-                continue
-
-            # Take non-overlapping part of the polygon
-            poly_wo_overlaps = poly.difference(polygons_union)
-            if poly_wo_overlaps.is_empty:
-                continue
-            polygons_union = polygons_union.union(poly)
-
-            anno = {}
-            anno["iscrowd"] = label_name.endswith("group")
-            anno["category_id"] = label.id
-
-            if isinstance(poly_wo_overlaps, Polygon):
-                poly_list = [poly_wo_overlaps]
-            elif isinstance(poly_wo_overlaps, MultiPolygon):
-                poly_list = poly_wo_overlaps.geoms
-            else:
-                raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
-
-            poly_coord = []
-            for poly_el in poly_list:
-                # COCO API can work only with exterior boundaries now, hence we store only them.
-                # TODO: store both exterior and interior boundaries once other parts of the
-                # codebase support holes in polygons.
-                poly_coord.append(list(chain(*poly_el.exterior.coords)))
-            anno["segmentation"] = poly_coord
-            (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
-
-            anno["bbox"] = (xmin, ymin, xmax, ymax)
-            anno["bbox_mode"] = BoxMode.XYXY_ABS
-
-            annos.append(anno)
-    else:
-        # See also the official annotation parsing scripts at
-        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py  # noqa
-        with PathManager.open(instance_id_file, "rb") as f:
-            inst_image = np.asarray(Image.open(f), order="F")
-        # ids < 24 are stuff labels (filtering them first is about 5% faster)
-        flattened_ids = np.unique(inst_image[inst_image >= 24])
-
-        ret = {
-            "file_name": image_file,
-            "image_id": os.path.basename(image_file),
-            "height": inst_image.shape[0],
-            "width": inst_image.shape[1],
-        }
-
-        for instance_id in flattened_ids:
-            # For non-crowd annotations, instance_id // 1000 is the label_id
-            # Crowd annotations have <1000 instance ids
-            label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
-            label = id2label[label_id]
-            if not label.hasInstances or label.ignoreInEval:
-                continue
-
-            anno = {}
-            anno["iscrowd"] = instance_id < 1000
-            anno["category_id"] = label.id
-
-            mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
-
-            inds = np.nonzero(mask)
-            ymin, ymax = inds[0].min(), inds[0].max()
-            xmin, xmax = inds[1].min(), inds[1].max()
-            anno["bbox"] = (xmin, ymin, xmax, ymax)
-            if xmax <= xmin or ymax <= ymin:
-                continue
-            anno["bbox_mode"] = BoxMode.XYXY_ABS
-            if to_polygons:
-                # This conversion comes from D4809743 and D5171122,
-                # when Mask-RCNN was first developed.
-                contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
-                    -2
-                ]
-                polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
-                # opencv's can produce invalid polygons
-                if len(polygons) == 0:
-                    continue
-                anno["segmentation"] = polygons
-            else:
-                anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
-            annos.append(anno)
-    ret["annotations"] = annos
-    return ret
-
-
-if __name__ == "__main__":
-    """
-    Test the cityscapes dataset loader.
-
-    Usage:
-        python -m detectron2.data.data.cityscapes \
-            cityscapes/leftImg8bit/train cityscapes/gtFine/train
-    """
-    import argparse
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("image_dir")
-    parser.add_argument("gt_dir")
-    parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
-    args = parser.parse_args()
-    from detectron2.data.catalog import Metadata
-    from detectron2.utils.visualizer import Visualizer
-    from cityscapesscripts.helpers.labels import labels
-
-    logger = setup_logger(name=__name__)
-
-    dirname = "cityscapes-data-vis"
-    os.makedirs(dirname, exist_ok=True)
-
-    if args.type == "instance":
-        dicts = load_cityscapes_instances(
-            args.image_dir, args.gt_dir, from_json=True, to_polygons=True
-        )
-        logger.info("Done loading {} samples.".format(len(dicts)))
-
-        thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
-        meta = Metadata().set(thing_classes=thing_classes)
-
-    else:
-        dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
-        logger.info("Done loading {} samples.".format(len(dicts)))
-
-        stuff_names = [k.name for k in labels if k.trainId != 255]
-        stuff_colors = [k.color for k in labels if k.trainId != 255]
-        meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors)
-
-    for d in dicts:
-        img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
-        visualizer = Visualizer(img, metadata=meta)
-        vis = visualizer.draw_dataset_dict(d)
-        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
-        # cv2.waitKey()
-        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
-        vis.save(fpath)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py
deleted file mode 100644
index f6f099e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py
+++ /dev/null
@@ -1,466 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import contextlib
-import datetime
-import io
-import json
-import logging
-import numpy as np
-import os
-import pycocotools.mask as mask_util
-from fvcore.common.file_io import PathManager, file_lock
-from fvcore.common.timer import Timer
-from PIL import Image
-
-from detectron2.structures import Boxes, BoxMode, PolygonMasks
-
-from .. import DatasetCatalog, MetadataCatalog
-
-"""
-This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
-"""
-
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json"]
-
-
-def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
-    """
-    Load a json file with COCO's instances annotation format.
-    Currently supports instance detection, instance segmentation,
-    and person keypoints annotations.
-
-    Args:
-        json_file (str): full path to the json file in COCO instances annotation format.
-        image_root (str or path-like): the directory where the images in this json file exists.
-        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
-            If provided, this function will also put "thing_classes" into
-            the metadata associated with this dataset.
-        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
-            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
-            "category_id", "segmentation"). The values for these keys will be returned as-is.
-            For example, the densepose annotations are loaded in this way.
-
-    Returns:
-        list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
-        `Using Custom Datasets </tutorials/data.html>`_ )
-
-    Notes:
-        1. This function does not read the image files.
-           The results do not have the "image" field.
-    """
-    from pycocotools.coco import COCO
-
-    timer = Timer()
-    json_file = PathManager.get_local_path(json_file)
-    with contextlib.redirect_stdout(io.StringIO()):
-        coco_api = COCO(json_file)
-    if timer.seconds() > 1:
-        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
-
-    id_map = None
-    if dataset_name is not None:
-        meta = MetadataCatalog.get(dataset_name)
-        cat_ids = sorted(coco_api.getCatIds())
-        cats = coco_api.loadCats(cat_ids)
-        # The categories in a custom json file may not be sorted.
-        thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
-        meta.thing_classes = thing_classes
-
-        # In COCO, certain category ids are artificially removed,
-        # and by convention they are always ignored.
-        # We deal with COCO's id issue and translate
-        # the category ids to contiguous ids in [0, 80).
-
-        # It works by looking at the "categories" field in the json, therefore
-        # if users' own json also have incontiguous ids, we'll
-        # apply this mapping as well but print a warning.
-        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
-            if "coco" not in dataset_name:
-                logger.warning(
-                    """
-Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
-"""
-                )
-        id_map = {v: i for i, v in enumerate(cat_ids)}
-        meta.thing_dataset_id_to_contiguous_id = id_map
-
-    # sort indices for reproducible results
-    img_ids = sorted(coco_api.imgs.keys())
-    # imgs is a list of dicts, each looks something like:
-    # {'license': 4,
-    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
-    #  'file_name': 'COCO_val2014_000000001268.jpg',
-    #  'height': 427,
-    #  'width': 640,
-    #  'date_captured': '2013-11-17 05:57:24',
-    #  'id': 1268}
-    imgs = coco_api.loadImgs(img_ids)
-    # anns is a list[list[dict]], where each dict is an annotation
-    # record for an object. The inner list enumerates the objects in an image
-    # and the outer list enumerates over images. Example of anns[0]:
-    # [{'segmentation': [[192.81,
-    #     247.09,
-    #     ...
-    #     219.03,
-    #     249.06]],
-    #   'area': 1035.749,
-    #   'iscrowd': 0,
-    #   'image_id': 1268,
-    #   'bbox': [192.81, 224.8, 74.73, 33.43],
-    #   'category_id': 16,
-    #   'id': 42986},
-    #  ...]
-    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
-
-    if "minival" not in json_file:
-        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
-        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
-        # Therefore we explicitly white-list them.
-        ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
-        assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
-            json_file
-        )
-
-    imgs_anns = list(zip(imgs, anns))
-
-    logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
-
-    dataset_dicts = []
-
-    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
-
-    num_instances_without_valid_segmentation = 0
-
-    for (img_dict, anno_dict_list) in imgs_anns:
-        record = {}
-        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
-        record["height"] = img_dict["height"]
-        record["width"] = img_dict["width"]
-        image_id = record["image_id"] = img_dict["id"]
-
-        objs = []
-        for anno in anno_dict_list:
-            # Check that the image_id in this annotation is the same as
-            # the image_id we're looking at.
-            # This fails only when the data parsing logic or the annotation file is buggy.
-
-            # The original COCO valminusminival2014 & minival2014 annotation files
-            # actually contains bugs that, together with certain ways of using COCO API,
-            # can trigger this assertion.
-            assert anno["image_id"] == image_id
-
-            assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
-
-            obj = {key: anno[key] for key in ann_keys if key in anno}
-
-            segm = anno.get("segmentation", None)
-            if segm:  # either list[list[float]] or dict(RLE)
-                if not isinstance(segm, dict):
-                    # filter out invalid polygons (< 3 points)
-                    segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
-                    if len(segm) == 0:
-                        num_instances_without_valid_segmentation += 1
-                        continue  # ignore this instance
-                obj["segmentation"] = segm
-
-            keypts = anno.get("keypoints", None)
-            if keypts:  # list[int]
-                for idx, v in enumerate(keypts):
-                    if idx % 3 != 2:
-                        # COCO's segmentation coordinates are floating points in [0, H or W],
-                        # but keypoint coordinates are integers in [0, H-1 or W-1]
-                        # Therefore we assume the coordinates are "pixel indices" and
-                        # add 0.5 to convert to floating point coordinates.
-                        keypts[idx] = v + 0.5
-                obj["keypoints"] = keypts
-
-            obj["bbox_mode"] = BoxMode.XYWH_ABS
-            if id_map:
-                obj["category_id"] = id_map[obj["category_id"]]
-            objs.append(obj)
-        record["annotations"] = objs
-        dataset_dicts.append(record)
-
-    if num_instances_without_valid_segmentation > 0:
-        logger.warning(
-            "Filtered out {} instances without valid segmentation. "
-            "There might be issues in your dataset generation process.".format(
-                num_instances_without_valid_segmentation
-            )
-        )
-    return dataset_dicts
-
-
-def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
-    """
-    Load semantic segmentation data. All files under "gt_root" with "gt_ext" extension are
-    treated as ground truth annotations and all files under "image_root" with "image_ext" extension
-    as input images. Ground truth and input images are matched using file paths relative to
-    "gt_root" and "image_root" respectively without taking into account file extensions.
-    This works for COCO as well as some other data.
-
-    Args:
-        gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
-            annotations are stored as images with integer values in pixels that represent
-            corresponding semantic labels.
-        image_root (str): the directory where the input images are.
-        gt_ext (str): file extension for ground truth annotations.
-        image_ext (str): file extension for input images.
-
-    Returns:
-        list[dict]:
-            a list of dicts in detectron2 standard format without instance-level
-            annotation.
-
-    Notes:
-        1. This function does not read the image and ground truth files.
-           The results do not have the "image" and "sem_seg" fields.
-    """
-
-    # We match input images with ground truth based on their relative filepaths (without file
-    # extensions) starting from 'image_root' and 'gt_root' respectively.
-    def file2id(folder_path, file_path):
-        # extract relative path starting from `folder_path`
-        image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
-        # remove file extension
-        image_id = os.path.splitext(image_id)[0]
-        return image_id
-
-    input_files = sorted(
-        (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
-        key=lambda file_path: file2id(image_root, file_path),
-    )
-    gt_files = sorted(
-        (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
-        key=lambda file_path: file2id(gt_root, file_path),
-    )
-
-    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
-
-    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
-    if len(input_files) != len(gt_files):
-        logger.warn(
-            "Directory {} and {} has {} and {} files, respectively.".format(
-                image_root, gt_root, len(input_files), len(gt_files)
-            )
-        )
-        input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
-        gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
-        intersect = list(set(input_basenames) & set(gt_basenames))
-        # sort, otherwise each worker may obtain a list[dict] in different order
-        intersect = sorted(intersect)
-        logger.warn("Will use their intersection of {} files.".format(len(intersect)))
-        input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
-        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
-
-    logger.info(
-        "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
-    )
-
-    dataset_dicts = []
-    for (img_path, gt_path) in zip(input_files, gt_files):
-        record = {}
-        record["file_name"] = img_path
-        record["sem_seg_file_name"] = gt_path
-        dataset_dicts.append(record)
-
-    return dataset_dicts
-
-
-def convert_to_coco_dict(dataset_name):
-    """
-    Convert an instance detection/segmentation or keypoint detection dataset
-    in detectron2's standard format into COCO json format.
-
-    Generic dataset description can be found here:
-    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
-
-    COCO data format description can be found here:
-    http://cocodataset.org/#format-data
-
-    Args:
-        dataset_name (str):
-            name of the source dataset
-            Must be registered in DatastCatalog and in detectron2's standard format.
-            Must have corresponding metadata "thing_classes"
-    Returns:
-        coco_dict: serializable dict in COCO json format
-    """
-
-    dataset_dicts = DatasetCatalog.get(dataset_name)
-    metadata = MetadataCatalog.get(dataset_name)
-
-    # unmap the category mapping ids for COCO
-    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
-        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
-        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
-    else:
-        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa
-
-    categories = [
-        {"id": reverse_id_mapper(id), "name": name}
-        for id, name in enumerate(metadata.thing_classes)
-    ]
-
-    logger.info("Converting dataset dicts into COCO format")
-    coco_images = []
-    coco_annotations = []
-
-    for image_id, image_dict in enumerate(dataset_dicts):
-        coco_image = {
-            "id": image_dict.get("image_id", image_id),
-            "width": image_dict["width"],
-            "height": image_dict["height"],
-            "file_name": image_dict["file_name"],
-        }
-        coco_images.append(coco_image)
-
-        anns_per_image = image_dict["annotations"]
-        for annotation in anns_per_image:
-            # create a new dict with only COCO fields
-            coco_annotation = {}
-
-            # COCO requirement: XYWH box format
-            bbox = annotation["bbox"]
-            bbox_mode = annotation["bbox_mode"]
-            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)
-
-            # COCO requirement: instance area
-            if "segmentation" in annotation:
-                # Computing areas for instances by counting the pixels
-                segmentation = annotation["segmentation"]
-                # TODO: check segmentation type: RLE, BinaryMask or Polygon
-                if isinstance(segmentation, list):
-                    polygons = PolygonMasks([segmentation])
-                    area = polygons.area()[0].item()
-                elif isinstance(segmentation, dict):  # RLE
-                    area = mask_util.area(segmentation).item()
-                else:
-                    raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
-            else:
-                # Computing areas using bounding boxes
-                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
-                area = Boxes([bbox_xy]).area()[0].item()
-
-            if "keypoints" in annotation:
-                keypoints = annotation["keypoints"]  # list[int]
-                for idx, v in enumerate(keypoints):
-                    if idx % 3 != 2:
-                        # COCO's segmentation coordinates are floating points in [0, H or W],
-                        # but keypoint coordinates are integers in [0, H-1 or W-1]
-                        # For COCO format consistency we substract 0.5
-                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
-                        keypoints[idx] = v - 0.5
-                if "num_keypoints" in annotation:
-                    num_keypoints = annotation["num_keypoints"]
-                else:
-                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
-
-            # COCO requirement:
-            #   linking annotations to images
-            #   "id" field must start with 1
-            coco_annotation["id"] = len(coco_annotations) + 1
-            coco_annotation["image_id"] = coco_image["id"]
-            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
-            coco_annotation["area"] = float(area)
-            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
-            coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"])
-
-            # Add optional fields
-            if "keypoints" in annotation:
-                coco_annotation["keypoints"] = keypoints
-                coco_annotation["num_keypoints"] = num_keypoints
-
-            if "segmentation" in annotation:
-                coco_annotation["segmentation"] = annotation["segmentation"]
-                if isinstance(coco_annotation["segmentation"], dict):  # RLE
-                    coco_annotation["segmentation"]["counts"] = coco_annotation["segmentation"][
-                        "counts"
-                    ].decode("ascii")
-
-            coco_annotations.append(coco_annotation)
-
-    logger.info(
-        "Conversion finished, "
-        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
-    )
-
-    info = {
-        "date_created": str(datetime.datetime.now()),
-        "description": "Automatically generated COCO json file for Detectron2.",
-    }
-    coco_dict = {
-        "info": info,
-        "images": coco_images,
-        "annotations": coco_annotations,
-        "categories": categories,
-        "licenses": None,
-    }
-    return coco_dict
-
-
-def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
-    """
-    Converts dataset into COCO format and saves it to a json file.
-    dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
-
-    Args:
-        dataset_name:
-            reference from the config file to the catalogs
-            must be registered in DatasetCatalog and in detectron2's standard format
-        output_file: path of json file that will be saved to
-        allow_cached: if json file is already present then skip conversion
-    """
-
-    # TODO: The dataset or the conversion script *may* change,
-    # a checksum would be useful for validating the cached data
-
-    PathManager.mkdirs(os.path.dirname(output_file))
-    with file_lock(output_file):
-        if PathManager.exists(output_file) and allow_cached:
-            logger.warning(
-                f"Using previously cached COCO format annotations at '{output_file}'. "
-                "You need to clear the cache file if your dataset has been modified."
-            )
-        else:
-            logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)")
-            coco_dict = convert_to_coco_dict(dataset_name)
-
-            logger.info(f"Caching COCO format annotations at '{output_file}' ...")
-            with PathManager.open(output_file, "w") as f:
-                json.dump(coco_dict, f)
-
-
-if __name__ == "__main__":
-    """
-    Test the COCO json dataset loader.
-
-    Usage:
-        python -m detectron2.data.data.coco \
-            path/to/json path/to/image_root dataset_name
-
-        "dataset_name" can be "coco_2014_minival_100", or other
-        pre-registered ones
-    """
-    from detectron2.utils.logger import setup_logger
-    from detectron2.utils.visualizer import Visualizer
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    import sys
-
-    logger = setup_logger(name=__name__)
-    assert sys.argv[3] in DatasetCatalog.list()
-    meta = MetadataCatalog.get(sys.argv[3])
-
-    dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
-    logger.info("Done loading {} samples.".format(len(dicts)))
-
-    dirname = "coco-data-vis"
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = np.array(Image.open(d["file_name"]))
-        visualizer = Visualizer(img, metadata=meta)
-        vis = visualizer.draw_dataset_dict(d)
-        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
-        vis.save(fpath)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py
deleted file mode 100644
index 7b95be3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import os
-from fvcore.common.file_io import PathManager
-from fvcore.common.timer import Timer
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-from .builtin_meta import _get_coco_instances_meta
-from .lvis_v0_5_categories import LVIS_CATEGORIES
-
-"""
-This file contains functions to parse LVIS-format annotations into dicts in the
-"Detectron2 format".
-"""
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"]
-
-
-def register_lvis_instances(name, metadata, json_file, image_root):
-    """
-    Register a dataset in LVIS's json annotation format for instance detection and segmentation.
-
-    Args:
-        name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
-        metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
-        json_file (str): path to the json instance annotation file.
-        image_root (str or path-like): directory which contains all the images.
-    """
-    DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name))
-    MetadataCatalog.get(name).set(
-        json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata
-    )
-
-
-def load_lvis_json(json_file, image_root, dataset_name=None):
-    """
-    Load a json file in LVIS's annotation format.
-
-    Args:
-        json_file (str): full path to the LVIS json annotation file.
-        image_root (str): the directory where the images in this json file exists.
-        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
-            If provided, this function will put "thing_classes" into the metadata
-            associated with this dataset.
-
-    Returns:
-        list[dict]: a list of dicts in Detectron2 standard format. (See
-        `Using Custom Datasets </tutorials/data.html>`_ )
-
-    Notes:
-        1. This function does not read the image files.
-           The results do not have the "image" field.
-    """
-    from lvis import LVIS
-
-    json_file = PathManager.get_local_path(json_file)
-
-    timer = Timer()
-    lvis_api = LVIS(json_file)
-    if timer.seconds() > 1:
-        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
-
-    if dataset_name is not None:
-        meta = get_lvis_instances_meta(dataset_name)
-        MetadataCatalog.get(dataset_name).set(**meta)
-
-    # sort indices for reproducible results
-    img_ids = sorted(lvis_api.imgs.keys())
-    # imgs is a list of dicts, each looks something like:
-    # {'license': 4,
-    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
-    #  'file_name': 'COCO_val2014_000000001268.jpg',
-    #  'height': 427,
-    #  'width': 640,
-    #  'date_captured': '2013-11-17 05:57:24',
-    #  'id': 1268}
-    imgs = lvis_api.load_imgs(img_ids)
-    # anns is a list[list[dict]], where each dict is an annotation
-    # record for an object. The inner list enumerates the objects in an image
-    # and the outer list enumerates over images. Example of anns[0]:
-    # [{'segmentation': [[192.81,
-    #     247.09,
-    #     ...
-    #     219.03,
-    #     249.06]],
-    #   'area': 1035.749,
-    #   'image_id': 1268,
-    #   'bbox': [192.81, 224.8, 74.73, 33.43],
-    #   'category_id': 16,
-    #   'id': 42986},
-    #  ...]
-    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
-
-    # Sanity check that each annotation has a unique id
-    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
-    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format(
-        json_file
-    )
-
-    imgs_anns = list(zip(imgs, anns))
-
-    logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file))
-
-    dataset_dicts = []
-
-    for (img_dict, anno_dict_list) in imgs_anns:
-        record = {}
-        file_name = img_dict["file_name"]
-        if img_dict["file_name"].startswith("COCO"):
-            # Convert form the COCO 2014 file naming convention of
-            # COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of
-            # 000000000000.jpg (LVIS v1 will fix this naming issue)
-            file_name = file_name[-16:]
-        record["file_name"] = os.path.join(image_root, file_name)
-        record["height"] = img_dict["height"]
-        record["width"] = img_dict["width"]
-        record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", [])
-        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
-        image_id = record["image_id"] = img_dict["id"]
-
-        objs = []
-        for anno in anno_dict_list:
-            # Check that the image_id in this annotation is the same as
-            # the image_id we're looking at.
-            # This fails only when the data parsing logic or the annotation file is buggy.
-            assert anno["image_id"] == image_id
-            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
-            obj["category_id"] = anno["category_id"] - 1  # Convert 1-indexed to 0-indexed
-            segm = anno["segmentation"]  # list[list[float]]
-            # filter out invalid polygons (< 3 points)
-            valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
-            assert len(segm) == len(
-                valid_segm
-            ), "Annotation contains an invalid polygon with < 3 points"
-            assert len(segm) > 0
-            obj["segmentation"] = segm
-            objs.append(obj)
-        record["annotations"] = objs
-        dataset_dicts.append(record)
-
-    return dataset_dicts
-
-
-def get_lvis_instances_meta(dataset_name):
-    """
-    Load LVIS metadata.
-
-    Args:
-        dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5").
-
-    Returns:
-        dict: LVIS metadata with keys: thing_classes
-    """
-    if "cocofied" in dataset_name:
-        return _get_coco_instances_meta()
-    if "v0.5" in dataset_name:
-        return _get_lvis_instances_meta_v0_5()
-    # There will be a v1 in the future
-    # elif dataset_name == "lvis_v1":
-    #   return get_lvis_instances_meta_v1()
-    raise ValueError("No built-in metadata for dataset {}".format(dataset_name))
-
-
-def _get_lvis_instances_meta_v0_5():
-    assert len(LVIS_CATEGORIES) == 1230
-    cat_ids = [k["id"] for k in LVIS_CATEGORIES]
-    assert min(cat_ids) == 1 and max(cat_ids) == len(
-        cat_ids
-    ), "Category ids are not in [1, #categories], as expected"
-    # Ensure that the category list is sorted by id
-    lvis_categories = sorted(LVIS_CATEGORIES, key=lambda x: x["id"])
-    thing_classes = [k["synonyms"][0] for k in lvis_categories]
-    meta = {"thing_classes": thing_classes}
-    return meta
-
-
-if __name__ == "__main__":
-    """
-    Test the LVIS json dataset loader.
-
-    Usage:
-        python -m detectron2.data.data.lvis \
-            path/to/json path/to/image_root dataset_name vis_limit
-    """
-    import sys
-    import numpy as np
-    from detectron2.utils.logger import setup_logger
-    from PIL import Image
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from detectron2.utils.visualizer import Visualizer
-
-    logger = setup_logger(name=__name__)
-    meta = MetadataCatalog.get(sys.argv[3])
-
-    dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3])
-    logger.info("Done loading {} samples.".format(len(dicts)))
-
-    dirname = "lvis-data-vis"
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts[: int(sys.argv[4])]:
-        img = np.array(Image.open(d["file_name"]))
-        visualizer = Visualizer(img, metadata=meta)
-        vis = visualizer.draw_dataset_dict(d)
-        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
-        vis.save(fpath)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
deleted file mode 100644
index 8205e60..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# Autogen with
-# with open("lvis_v0.5_val.json", "r") as f:
-#     a = json.load(f)
-# c = a["categories"]
-# for x in c:
-#     del x["image_count"]
-#     del x["instance_count"]
-# LVIS_CATEGORIES = repr(c) + "  # noqa"
-
-# fmt: off
-LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}]  # noqa
-# fmt: on
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py
deleted file mode 100644
index 5872d96..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import numpy as np
-import os
-import xml.etree.ElementTree as ET
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-__all__ = ["register_pascal_voc"]
-
-
-# fmt: off
-CLASS_NAMES = [
-    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
-    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
-    "pottedplant", "sheep", "sofa", "train", "tvmonitor",
-]
-# fmt: on
-
-
-def load_voc_instances(dirname: str, split: str):
-    """
-    Load Pascal VOC detection annotations to Detectron2 format.
-
-    Args:
-        dirname: Contain "Annotations", "ImageSets", "JPEGImages"
-        split (str): one of "train", "test", "val", "trainval"
-    """
-    with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
-        fileids = np.loadtxt(f, dtype=np.str)
-
-    # Needs to read many small annotation files. Makes sense at local
-    annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
-    dicts = []
-    for fileid in fileids:
-        anno_file = os.path.join(annotation_dirname, fileid + ".xml")
-        jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
-
-        with PathManager.open(anno_file) as f:
-            tree = ET.parse(f)
-
-        r = {
-            "file_name": jpeg_file,
-            "image_id": fileid,
-            "height": int(tree.findall("./size/height")[0].text),
-            "width": int(tree.findall("./size/width")[0].text),
-        }
-        instances = []
-
-        for obj in tree.findall("object"):
-            cls = obj.find("name").text
-            # We include "difficult" samples in training.
-            # Based on limited experiments, they don't hurt accuracy.
-            # difficult = int(obj.find("difficult").text)
-            # if difficult == 1:
-            # continue
-            bbox = obj.find("bndbox")
-            bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
-            # Original annotations are integers in the range [1, W or H]
-            # Assuming they mean 1-based pixel indices (inclusive),
-            # a box with annotation (xmin=1, xmax=W) covers the whole image.
-            # In coordinate space this is represented by (xmin=0, xmax=W)
-            bbox[0] -= 1.0
-            bbox[1] -= 1.0
-            instances.append(
-                {"category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
-            )
-        r["annotations"] = instances
-        dicts.append(r)
-    return dicts
-
-
-def register_pascal_voc(name, dirname, split, year):
-    DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split))
-    MetadataCatalog.get(name).set(
-        thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py
deleted file mode 100644
index a0a4db6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import os
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-
-from .coco import load_coco_json, load_sem_seg
-
-"""
-This file contains functions to register a COCO-format dataset to the DatasetCatalog.
-"""
-
-__all__ = ["register_coco_instances", "register_coco_panoptic_separated"]
-
-
-def register_coco_instances(name, metadata, json_file, image_root):
-    """
-    Register a dataset in COCO's json annotation format for
-    instance detection, instance segmentation and keypoint detection.
-    (i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
-    `instances*.json` and `person_keypoints*.json` in the dataset).
-
-    This is an example of how to register a new dataset.
-    You can do something similar to this function, to register new data.
-
-    Args:
-        name (str): the name that identifies a dataset, e.g. "coco_2014_train".
-        metadata (dict): extra metadata associated with this dataset.  You can
-            leave it as an empty dict.
-        json_file (str): path to the json instance annotation file.
-        image_root (str or path-like): directory which contains all the images.
-    """
-    assert isinstance(name, str), name
-    assert isinstance(json_file, (str, os.PathLike)), json_file
-    assert isinstance(image_root, (str, os.PathLike)), image_root
-    # 1. register a function which returns dicts
-    DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))
-
-    # 2. Optionally, add metadata about this dataset,
-    # since they might be useful in evaluation, visualization or logging
-    MetadataCatalog.get(name).set(
-        json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
-    )
-
-
-def register_coco_panoptic_separated(
-    name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
-):
-    """
-    Register a COCO panoptic segmentation dataset named `name`.
-    The annotations in this registered dataset will contain both instance annotations and
-    semantic annotations, each with its own contiguous ids. Hence it's called "separated".
-
-    It follows the setting used by the PanopticFPN paper:
-
-    1. The instance annotations directly come from polygons in the COCO
-       instances annotation task, rather than from the masks in the COCO panoptic annotations.
-
-       The two format have small differences:
-       Polygons in the instance annotations may have overlaps.
-       The mask annotations are produced by labeling the overlapped polygons
-       with depth ordering.
-
-    2. The semantic annotations are converted from panoptic annotations, where
-       all "things" are assigned a semantic id of 0.
-       All semantic categories will therefore have ids in contiguous
-       range [1, #stuff_categories].
-
-    This function will also register a pure semantic segmentation dataset
-    named ``name + '_stuffonly'``.
-
-    Args:
-        name (str): the name that identifies a dataset,
-            e.g. "coco_2017_train_panoptic"
-        metadata (dict): extra metadata associated with this dataset.
-        image_root (str): directory which contains all the images
-        panoptic_root (str): directory which contains panoptic annotation images
-        panoptic_json (str): path to the json panoptic annotation file
-        sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
-        instances_json (str): path to the json instance annotation file
-    """
-    panoptic_name = name + "_separated"
-    DatasetCatalog.register(
-        panoptic_name,
-        lambda: merge_to_panoptic(
-            load_coco_json(instances_json, image_root, panoptic_name),
-            load_sem_seg(sem_seg_root, image_root),
-        ),
-    )
-    MetadataCatalog.get(panoptic_name).set(
-        panoptic_root=panoptic_root,
-        image_root=image_root,
-        panoptic_json=panoptic_json,
-        sem_seg_root=sem_seg_root,
-        json_file=instances_json,  # TODO rename
-        evaluator_type="coco_panoptic_seg",
-        **metadata
-    )
-
-    semantic_name = name + "_stuffonly"
-    DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root))
-    MetadataCatalog.get(semantic_name).set(
-        sem_seg_root=sem_seg_root, image_root=image_root, evaluator_type="sem_seg", **metadata
-    )
-
-
-def merge_to_panoptic(detection_dicts, sem_seg_dicts):
-    """
-    Create dataset dicts for panoptic segmentation, by
-    merging two dicts using "file_name" field to match their entries.
-
-    Args:
-        detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
-        sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
-
-    Returns:
-        list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
-            both detection_dicts and sem_seg_dicts that correspond to the same image.
-            The function assumes that the same key in different dicts has the same value.
-    """
-    results = []
-    sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
-    assert len(sem_seg_file_to_entry) > 0
-
-    for det_dict in detection_dicts:
-        dic = copy.copy(det_dict)
-        dic.update(sem_seg_file_to_entry[dic["file_name"]])
-        results.append(dic)
-    return results
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py
deleted file mode 100644
index e19c7e2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py
+++ /dev/null
@@ -1,516 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-Common data processing utilities that are used in a
-typical object detection data pipeline.
-"""
-import logging
-import numpy as np
-import pycocotools.mask as mask_util
-import torch
-from fvcore.common.file_io import PathManager
-from PIL import Image, ImageOps
-
-from detectron2.structures import (
-    BitMasks,
-    Boxes,
-    BoxMode,
-    Instances,
-    Keypoints,
-    PolygonMasks,
-    RotatedBoxes,
-    polygons_to_bitmask,
-)
-
-from . import transforms as T
-from .catalog import MetadataCatalog
-
-
-class SizeMismatchError(ValueError):
-    """
-    When loaded image has difference width/height compared with annotation.
-    """
-
-
-# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
-_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
-_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
-
-
-def convert_PIL_to_numpy(image, format):
-    """
-    Convert PIL image to numpy array of target format.
-
-    Args:
-        image (PIL.Image): a PIL image
-        format (str): the format of output image
-
-    Returns:
-        (np.ndarray): also see `read_image`
-    """
-    if format is not None:
-        # PIL only supports RGB, so convert to RGB and flip channels over below
-        conversion_format = format
-        if format in ["BGR", "YUV-BT.601"]:
-            conversion_format = "RGB"
-        image = image.convert(conversion_format)
-    image = np.asarray(image)
-    # PIL squeezes out the channel dimension for "L", so make it HWC
-    if format == "L":
-        image = np.expand_dims(image, -1)
-
-    # handle formats not supported by PIL
-    elif format == "BGR":
-        # flip channels if needed
-        image = image[:, :, ::-1]
-    elif format == "YUV-BT.601":
-        image = image / 255.0
-        image = np.dot(image, np.array(_M_RGB2YUV).T)
-
-    return image
-
-
-def convert_image_to_rgb(image, format):
-    """
-    Convert numpy image from given format to RGB.
-
-    Args:
-        image (np.ndarray): a numpy image
-        format (str): the format of input image, also see `read_image`
-
-    Returns:
-        (np.ndarray): HWC RGB image in 0-255 range, can be either float or uint8
-    """
-    if format == "BGR":
-        image = image[:, :, [2, 1, 0]]
-    elif format == "YUV-BT.601":
-        image = np.dot(image, np.array(_M_YUV2RGB).T)
-        image = image * 255.0
-    else:
-        if format == "L":
-            image = image[:, :, 0]
-        image = image.astype(np.uint8)
-        image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
-    return image
-
-
-def read_image(file_name, format=None):
-    """
-    Read an image into the given format.
-    Will apply rotation and flipping if the image has such exif information.
-
-    Args:
-        file_name (str): image file path
-        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601"
-
-    Returns:
-        image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for
-            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
-    """
-    with PathManager.open(file_name, "rb") as f:
-        image = Image.open(f)
-
-        # capture and ignore this bug: https://github.com/python-pillow/Pillow/issues/3973
-        try:
-            image = ImageOps.exif_transpose(image)
-        except Exception:
-            pass
-
-        return convert_PIL_to_numpy(image, format)
-
-
-def check_image_size(dataset_dict, image):
-    """
-    Raise an error if the image does not match the size specified in the dict.
-    """
-    if "width" in dataset_dict or "height" in dataset_dict:
-        image_wh = (image.shape[1], image.shape[0])
-        expected_wh = (dataset_dict["width"], dataset_dict["height"])
-        if not image_wh == expected_wh:
-            raise SizeMismatchError(
-                "Mismatched (W,H){}, got {}, expect {}".format(
-                    " for image " + dataset_dict["file_name"]
-                    if "file_name" in dataset_dict
-                    else "",
-                    image_wh,
-                    expected_wh,
-                )
-            )
-
-    # To ensure bbox always remap to original image size
-    if "width" not in dataset_dict:
-        dataset_dict["width"] = image.shape[1]
-    if "height" not in dataset_dict:
-        dataset_dict["height"] = image.shape[0]
-
-
-def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk):
-    """
-    Apply transformations to the proposals in dataset_dict, if any.
-
-    Args:
-        dataset_dict (dict): a dict read from the dataset, possibly
-            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
-        image_shape (tuple): height, width
-        transforms (TransformList):
-        min_box_side_len (int): keep proposals with at least this size
-        proposal_topk (int): only keep top-K scoring proposals
-
-    The input dict is modified in-place, with abovementioned keys removed. A new
-    key "proposals" will be added. Its value is an `Instances`
-    object which contains the transformed proposals in its field
-    "proposal_boxes" and "objectness_logits".
-    """
-    if "proposal_boxes" in dataset_dict:
-        # Transform proposal boxes
-        boxes = transforms.apply_box(
-            BoxMode.convert(
-                dataset_dict.pop("proposal_boxes"),
-                dataset_dict.pop("proposal_bbox_mode"),
-                BoxMode.XYXY_ABS,
-            )
-        )
-        boxes = Boxes(boxes)
-        objectness_logits = torch.as_tensor(
-            dataset_dict.pop("proposal_objectness_logits").astype("float32")
-        )
-
-        boxes.clip(image_shape)
-        keep = boxes.nonempty(threshold=min_box_side_len)
-        boxes = boxes[keep]
-        objectness_logits = objectness_logits[keep]
-
-        proposals = Instances(image_shape)
-        proposals.proposal_boxes = boxes[:proposal_topk]
-        proposals.objectness_logits = objectness_logits[:proposal_topk]
-        dataset_dict["proposals"] = proposals
-
-
-def transform_instance_annotations(
-    annotation, transforms, image_size, *, keypoint_hflip_indices=None
-):
-    """
-    Apply transforms to box, segmentation and keypoints annotations of a single instance.
-
-    It will use `transforms.apply_box` for the box, and
-    `transforms.apply_coords` for segmentation polygons & keypoints.
-    If you need anything more specially designed for each data structure,
-    you'll need to implement your own version of this function or the transforms.
-
-    Args:
-        annotation (dict): dict of instance annotations for a single instance.
-            It will be modified in-place.
-        transforms (TransformList):
-        image_size (tuple): the height, width of the transformed image
-        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
-
-    Returns:
-        dict:
-            the same input dict with fields "bbox", "segmentation", "keypoints"
-            transformed according to `transforms`.
-            The "bbox_mode" field will be set to XYXY_ABS.
-    """
-    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
-    # Note that bbox is 1d (per-instance bounding box)
-    annotation["bbox"] = transforms.apply_box([bbox])[0]
-    annotation["bbox_mode"] = BoxMode.XYXY_ABS
-
-    if "segmentation" in annotation:
-        # each instance contains 1 or more polygons
-        segm = annotation["segmentation"]
-        if isinstance(segm, list):
-            # polygons
-            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
-            annotation["segmentation"] = [
-                p.reshape(-1) for p in transforms.apply_polygons(polygons)
-            ]
-        elif isinstance(segm, dict):
-            # RLE
-            mask = mask_util.decode(segm)
-            mask = transforms.apply_segmentation(mask)
-            assert tuple(mask.shape[:2]) == image_size
-            annotation["segmentation"] = mask
-        else:
-            raise ValueError(
-                "Cannot transform segmentation of type '{}'!"
-                "Supported types are: polygons as list[list[float] or ndarray],"
-                " COCO-style RLE as a dict.".format(type(segm))
-            )
-
-    if "keypoints" in annotation:
-        keypoints = transform_keypoint_annotations(
-            annotation["keypoints"], transforms, image_size, keypoint_hflip_indices
-        )
-        annotation["keypoints"] = keypoints
-
-    return annotation
-
-
-def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None):
-    """
-    Transform keypoint annotations of an image.
-
-    Args:
-        keypoints (list[float]): Nx3 float in Detectron2 Dataset format.
-        transforms (TransformList):
-        image_size (tuple): the height, width of the transformed image
-        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
-    """
-    # (N*3,) -> (N, 3)
-    keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3)
-    keypoints[:, :2] = transforms.apply_coords(keypoints[:, :2])
-
-    # This assumes that HorizFlipTransform is the only one that does flip
-    do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
-
-    # Alternative way: check if probe points was horizontally flipped.
-    # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]])
-    # probe_aug = transforms.apply_coords(probe.copy())
-    # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0])  # noqa
-
-    # If flipped, swap each keypoint with its opposite-handed equivalent
-    if do_hflip:
-        assert keypoint_hflip_indices is not None
-        keypoints = keypoints[keypoint_hflip_indices, :]
-
-    # Maintain COCO convention that if visibility == 0, then x, y = 0
-    # TODO may need to reset visibility for cropped keypoints,
-    # but it does not matter for our existing algorithms
-    keypoints[keypoints[:, 2] == 0] = 0
-    return keypoints
-
-
-def annotations_to_instances(annos, image_size, mask_format="polygon"):
-    """
-    Create an :class:`Instances` object used by the models,
-    from instance annotations in the dataset dict.
-
-    Args:
-        annos (list[dict]): a list of instance annotations in one image, each
-            element for one instance.
-        image_size (tuple): height, width
-
-    Returns:
-        Instances:
-            It will contain fields "gt_boxes", "gt_classes",
-            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
-            This is the format that builtin models expect.
-    """
-    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
-    target = Instances(image_size)
-    boxes = target.gt_boxes = Boxes(boxes)
-    boxes.clip(image_size)
-
-    classes = [obj["category_id"] for obj in annos]
-    classes = torch.tensor(classes, dtype=torch.int64)
-    target.gt_classes = classes
-
-    if len(annos) and "segmentation" in annos[0]:
-        segms = [obj["segmentation"] for obj in annos]
-        if mask_format == "polygon":
-            masks = PolygonMasks(segms)
-        else:
-            assert mask_format == "bitmask", mask_format
-            masks = []
-            for segm in segms:
-                if isinstance(segm, list):
-                    # polygon
-                    masks.append(polygons_to_bitmask(segm, *image_size))
-                elif isinstance(segm, dict):
-                    # COCO RLE
-                    masks.append(mask_util.decode(segm))
-                elif isinstance(segm, np.ndarray):
-                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
-                        segm.ndim
-                    )
-                    # mask array
-                    masks.append(segm)
-                else:
-                    raise ValueError(
-                        "Cannot convert segmentation of type '{}' to BitMasks!"
-                        "Supported types are: polygons as list[list[float] or ndarray],"
-                        " COCO-style RLE as a dict, or a full-image segmentation mask "
-                        "as a 2D ndarray.".format(type(segm))
-                    )
-            # torch.from_numpy does not support array with negative stride.
-            masks = BitMasks(
-                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
-            )
-        target.gt_masks = masks
-
-    if len(annos) and "keypoints" in annos[0]:
-        kpts = [obj.get("keypoints", []) for obj in annos]
-        target.gt_keypoints = Keypoints(kpts)
-
-    return target
-
-
-def annotations_to_instances_rotated(annos, image_size):
-    """
-    Create an :class:`Instances` object used by the models,
-    from instance annotations in the dataset dict.
-    Compared to `annotations_to_instances`, this function is for rotated boxes only
-
-    Args:
-        annos (list[dict]): a list of instance annotations in one image, each
-            element for one instance.
-        image_size (tuple): height, width
-
-    Returns:
-        Instances:
-            Containing fields "gt_boxes", "gt_classes",
-            if they can be obtained from `annos`.
-            This is the format that builtin models expect.
-    """
-    boxes = [obj["bbox"] for obj in annos]
-    target = Instances(image_size)
-    boxes = target.gt_boxes = RotatedBoxes(boxes)
-    boxes.clip(image_size)
-
-    classes = [obj["category_id"] for obj in annos]
-    classes = torch.tensor(classes, dtype=torch.int64)
-    target.gt_classes = classes
-
-    return target
-
-
-def filter_empty_instances(instances, by_box=True, by_mask=True, box_threshold=1e-5):
-    """
-    Filter out empty instances in an `Instances` object.
-
-    Args:
-        instances (Instances):
-        by_box (bool): whether to filter out instances with empty boxes
-        by_mask (bool): whether to filter out instances with empty masks
-        box_threshold (float): minimum width and height to be considered non-empty
-
-    Returns:
-        Instances: the filtered instances.
-    """
-    assert by_box or by_mask
-    r = []
-    if by_box:
-        r.append(instances.gt_boxes.nonempty(threshold=box_threshold))
-    if instances.has("gt_masks") and by_mask:
-        r.append(instances.gt_masks.nonempty())
-
-    # TODO: can also filter visible keypoints
-
-    if not r:
-        return instances
-    m = r[0]
-    for x in r[1:]:
-        m = m & x
-    return instances[m]
-
-
-def create_keypoint_hflip_indices(dataset_names):
-    """
-    Args:
-        dataset_names (list[str]): list of dataset names
-    Returns:
-        ndarray[int]: a vector of size=#keypoints, storing the
-        horizontally-flipped keypoint indices.
-    """
-
-    check_metadata_consistency("keypoint_names", dataset_names)
-    check_metadata_consistency("keypoint_flip_map", dataset_names)
-
-    meta = MetadataCatalog.get(dataset_names[0])
-    names = meta.keypoint_names
-    # TODO flip -> hflip
-    flip_map = dict(meta.keypoint_flip_map)
-    flip_map.update({v: k for k, v in flip_map.items()})
-    flipped_names = [i if i not in flip_map else flip_map[i] for i in names]
-    flip_indices = [names.index(i) for i in flipped_names]
-    return np.asarray(flip_indices)
-
-
-def gen_crop_transform_with_instance(crop_size, image_size, instance):
-    """
-    Generate a CropTransform so that the cropping region contains
-    the center of the given instance.
-
-    Args:
-        crop_size (tuple): h, w in pixels
-        image_size (tuple): h, w
-        instance (dict): an annotation dict of one instance, in Detectron2's
-            dataset format.
-    """
-    crop_size = np.asarray(crop_size, dtype=np.int32)
-    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
-    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
-    assert (
-        image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
-    ), "The annotation bounding box is outside of the image!"
-    assert (
-        image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
-    ), "Crop size is larger than image size!"
-
-    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
-    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
-    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
-
-    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
-    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
-    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
-
-
-def check_metadata_consistency(key, dataset_names):
-    """
-    Check that the data have consistent metadata.
-
-    Args:
-        key (str): a metadata key
-        dataset_names (list[str]): a list of dataset names
-
-    Raises:
-        AttributeError: if the key does not exist in the metadata
-        ValueError: if the given data do not have the same metadata values defined by key
-    """
-    if len(dataset_names) == 0:
-        return
-    logger = logging.getLogger(__name__)
-    entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names]
-    for idx, entry in enumerate(entries_per_dataset):
-        if entry != entries_per_dataset[0]:
-            logger.error(
-                "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry))
-            )
-            logger.error(
-                "Metadata '{}' for dataset '{}' is '{}'".format(
-                    key, dataset_names[0], str(entries_per_dataset[0])
-                )
-            )
-            raise ValueError("Datasets have different metadata '{}'!".format(key))
-
-
-def build_transform_gen(cfg, is_train):
-    """
-    Create a list of :class:`TransformGen` from config.
-    Now it includes resizing and flipping.
-
-    Returns:
-        list[TransformGen]
-    """
-    if is_train:
-        min_size = cfg.INPUT.MIN_SIZE_TRAIN
-        max_size = cfg.INPUT.MAX_SIZE_TRAIN
-        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
-    else:
-        min_size = cfg.INPUT.MIN_SIZE_TEST
-        max_size = cfg.INPUT.MAX_SIZE_TEST
-        sample_style = "choice"
-    if sample_style == "range":
-        assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(
-            len(min_size)
-        )
-
-    logger = logging.getLogger(__name__)
-    tfm_gens = []
-    tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
-    if is_train:
-        tfm_gens.append(T.RandomFlip())
-        logger.info("TransformGens used in training: " + str(tfm_gens))
-    return tfm_gens
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py
deleted file mode 100644
index 9cfa8a6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
-from .grouped_batch_sampler import GroupedBatchSampler
-
-__all__ = [
-    "GroupedBatchSampler",
-    "TrainingSampler",
-    "InferenceSampler",
-    "RepeatFactorTrainingSampler",
-]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py
deleted file mode 100644
index 4ac57bb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-import math
-from collections import defaultdict
-from typing import Optional
-import torch
-from torch.utils.data.sampler import Sampler
-
-from detectron2.utils import comm
-
-
-class TrainingSampler(Sampler):
-    """
-    In training, we only care about the "infinite stream" of training data.
-    So this sampler produces an infinite stream of indices and
-    all workers cooperate to correctly shuffle the indices and sample different indices.
-
-    The samplers in each worker effectively produces `indices[worker_id::num_workers]`
-    where `indices` is an infinite stream of indices consisting of
-    `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
-    or `range(size) + range(size) + ...` (if shuffle is False)
-    """
-
-    def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None):
-        """
-        Args:
-            size (int): the total number of data of the underlying dataset to sample from
-            shuffle (bool): whether to shuffle the indices or not
-            seed (int): the initial seed of the shuffle. Must be the same
-                across all workers. If None, will use a random seed shared
-                among workers (require synchronization among all workers).
-        """
-        self._size = size
-        assert size > 0
-        self._shuffle = shuffle
-        if seed is None:
-            seed = comm.shared_random_seed()
-        self._seed = int(seed)
-
-        self._rank = comm.get_rank()
-        self._world_size = comm.get_world_size()
-
-    def __iter__(self):
-        start = self._rank
-        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
-
-    def _infinite_indices(self):
-        g = torch.Generator()
-        g.manual_seed(self._seed)
-        while True:
-            if self._shuffle:
-                yield from torch.randperm(self._size, generator=g)
-            else:
-                yield from torch.arange(self._size)
-
-
-class RepeatFactorTrainingSampler(Sampler):
-    """
-    Similar to TrainingSampler, but suitable for training on class imbalanced data
-    like LVIS. In each epoch, an image may appear multiple times based on its "repeat
-    factor". The repeat factor for an image is a function of the frequency the rarest
-    category labeled in that image. The "frequency of category c" in [0, 1] is defined
-    as the fraction of images in the training set (without repeats) in which category c
-    appears.
-
-    See :paper:`lvis` (>= v2) Appendix B.2.
-    """
-
-    def __init__(self, dataset_dicts, repeat_thresh, shuffle=True, seed=None):
-        """
-        Args:
-            dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
-            repeat_thresh (float): frequency threshold below which data is repeated.
-            shuffle (bool): whether to shuffle the indices or not
-            seed (int): the initial seed of the shuffle. Must be the same
-                across all workers. If None, will use a random seed shared
-                among workers (require synchronization among all workers).
-        """
-        self._shuffle = shuffle
-        if seed is None:
-            seed = comm.shared_random_seed()
-        self._seed = int(seed)
-
-        self._rank = comm.get_rank()
-        self._world_size = comm.get_world_size()
-
-        # Get fractional repeat factors and split into whole number (_int_part)
-        # and fractional (_frac_part) parts.
-        rep_factors = self._get_repeat_factors(dataset_dicts, repeat_thresh)
-        self._int_part = torch.trunc(rep_factors)
-        self._frac_part = rep_factors - self._int_part
-
-    def _get_repeat_factors(self, dataset_dicts, repeat_thresh):
-        """
-        Compute (fractional) per-image repeat factors.
-
-        Args:
-            See __init__.
-
-        Returns:
-            torch.Tensor: the i-th element is the repeat factor for the dataset image
-                at index i.
-        """
-        # 1. For each category c, compute the fraction of images that contain it: f(c)
-        category_freq = defaultdict(int)
-        for dataset_dict in dataset_dicts:  # For each image (without repeats)
-            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
-            for cat_id in cat_ids:
-                category_freq[cat_id] += 1
-        num_images = len(dataset_dicts)
-        for k, v in category_freq.items():
-            category_freq[k] = v / num_images
-
-        # 2. For each category c, compute the category-level repeat factor:
-        #    r(c) = max(1, sqrt(t / f(c)))
-        category_rep = {
-            cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq))
-            for cat_id, cat_freq in category_freq.items()
-        }
-
-        # 3. For each image I, compute the image-level repeat factor:
-        #    r(I) = max_{c in I} r(c)
-        rep_factors = []
-        for dataset_dict in dataset_dicts:
-            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
-            rep_factor = max({category_rep[cat_id] for cat_id in cat_ids})
-            rep_factors.append(rep_factor)
-
-        return torch.tensor(rep_factors, dtype=torch.float32)
-
-    def _get_epoch_indices(self, generator):
-        """
-        Create a list of dataset indices (with repeats) to use for one epoch.
-
-        Args:
-            generator (torch.Generator): pseudo random number generator used for
-                stochastic rounding.
-
-        Returns:
-            torch.Tensor: list of dataset indices to use in one epoch. Each index
-                is repeated based on its calculated repeat factor.
-        """
-        # Since repeat factors are fractional, we use stochastic rounding so
-        # that the target repeat factor is achieved in expectation over the
-        # course of training
-        rands = torch.rand(len(self._frac_part), generator=generator)
-        rep_factors = self._int_part + (rands < self._frac_part).float()
-        # Construct a list of indices in which we repeat images as specified
-        indices = []
-        for dataset_index, rep_factor in enumerate(rep_factors):
-            indices.extend([dataset_index] * int(rep_factor.item()))
-        return torch.tensor(indices, dtype=torch.int64)
-
-    def __iter__(self):
-        start = self._rank
-        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
-
-    def _infinite_indices(self):
-        g = torch.Generator()
-        g.manual_seed(self._seed)
-        while True:
-            # Sample indices with repeats determined by stochastic rounding; each
-            # "epoch" may have a slightly different size due to the rounding.
-            indices = self._get_epoch_indices(g)
-            if self._shuffle:
-                randperm = torch.randperm(len(indices), generator=g)
-                yield from indices[randperm]
-            else:
-                yield from indices
-
-
-class InferenceSampler(Sampler):
-    """
-    Produce indices for inference.
-    Inference needs to run on the __exact__ set of samples,
-    therefore when the total number of samples is not divisible by the number of workers,
-    this sampler produces different number of samples on different workers.
-    """
-
-    def __init__(self, size: int):
-        """
-        Args:
-            size (int): the total number of data of the underlying dataset to sample from
-        """
-        self._size = size
-        assert size > 0
-        self._rank = comm.get_rank()
-        self._world_size = comm.get_world_size()
-
-        shard_size = (self._size - 1) // self._world_size + 1
-        begin = shard_size * self._rank
-        end = min(shard_size * (self._rank + 1), self._size)
-        self._local_indices = range(begin, end)
-
-    def __iter__(self):
-        yield from self._local_indices
-
-    def __len__(self):
-        return len(self._local_indices)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py
deleted file mode 100644
index 138e106..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-from torch.utils.data.sampler import BatchSampler, Sampler
-
-
-class GroupedBatchSampler(BatchSampler):
-    """
-    Wraps another sampler to yield a mini-batch of indices.
-    It enforces that the batch only contain elements from the same group.
-    It also tries to provide mini-batches which follows an ordering which is
-    as close as possible to the ordering from the original sampler.
-    """
-
-    def __init__(self, sampler, group_ids, batch_size):
-        """
-        Args:
-            sampler (Sampler): Base sampler.
-            group_ids (list[int]): If the sampler produces indices in range [0, N),
-                `group_ids` must be a list of `N` ints which contains the group id of each sample.
-                The group ids must be a set of integers in the range [0, num_groups).
-            batch_size (int): Size of mini-batch.
-        """
-        if not isinstance(sampler, Sampler):
-            raise ValueError(
-                "sampler should be an instance of "
-                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
-            )
-        self.sampler = sampler
-        self.group_ids = np.asarray(group_ids)
-        assert self.group_ids.ndim == 1
-        self.batch_size = batch_size
-        groups = np.unique(self.group_ids).tolist()
-
-        # buffer the indices of each group until batch size is reached
-        self.buffer_per_group = {k: [] for k in groups}
-
-    def __iter__(self):
-        for idx in self.sampler:
-            group_id = self.group_ids[idx]
-            group_buffer = self.buffer_per_group[group_id]
-            group_buffer.append(idx)
-            if len(group_buffer) == self.batch_size:
-                yield group_buffer[:]  # yield a copy of the list
-                del group_buffer[:]
-
-    def __len__(self):
-        raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py
deleted file mode 100644
index f7638bb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .transform import *
-from fvcore.transforms.transform import *
-from .transform_gen import *
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py
deleted file mode 100644
index bd93753..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# File: transform.py
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-from fvcore.transforms.transform import HFlipTransform, NoOpTransform, Transform
-from PIL import Image
-
-try:
-    import cv2  # noqa
-except ImportError:
-    # OpenCV is an optional dependency at the moment
-    pass
-
-__all__ = ["ExtentTransform", "ResizeTransform", "RotationTransform"]
-
-
-class ExtentTransform(Transform):
-    """
-    Extracts a subregion from the source image and scales it to the output size.
-
-    The fill color is used to map pixels from the source rect that fall outside
-    the source image.
-
-    See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
-    """
-
-    def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
-        """
-        Args:
-            src_rect (x0, y0, x1, y1): src coordinates
-            output_size (h, w): dst image size
-            interp: PIL interpolation methods
-            fill: Fill color used when src_rect extends outside image
-        """
-        super().__init__()
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        h, w = self.output_size
-        ret = Image.fromarray(img).transform(
-            size=(w, h),
-            method=Image.EXTENT,
-            data=self.src_rect,
-            resample=interp if interp else self.interp,
-            fill=self.fill,
-        )
-        return np.asarray(ret)
-
-    def apply_coords(self, coords):
-        # Transform image center from source coordinates into output coordinates
-        # and then map the new origin to the corner of the output image.
-        h, w = self.output_size
-        x0, y0, x1, y1 = self.src_rect
-        new_coords = coords.astype(np.float32)
-        new_coords[:, 0] -= 0.5 * (x0 + x1)
-        new_coords[:, 1] -= 0.5 * (y0 + y1)
-        new_coords[:, 0] *= w / (x1 - x0)
-        new_coords[:, 1] *= h / (y1 - y0)
-        new_coords[:, 0] += 0.5 * w
-        new_coords[:, 1] += 0.5 * h
-        return new_coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-
-class ResizeTransform(Transform):
-    """
-    Resize the image to a target size.
-    """
-
-    def __init__(self, h, w, new_h, new_w, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            new_h, new_w (int): new image size
-            interp: PIL interpolation methods, defaults to bilinear.
-        """
-        # TODO decide on PIL vs opencv
-        super().__init__()
-        if interp is None:
-            interp = Image.BILINEAR
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        assert img.shape[:2] == (self.h, self.w)
-        assert len(img.shape) <= 4
-
-        if img.dtype == np.uint8:
-            pil_image = Image.fromarray(img)
-            interp_method = interp if interp is not None else self.interp
-            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
-            ret = np.asarray(pil_image)
-        else:
-            # PIL only supports uint8
-            img = torch.from_numpy(img)
-            shape = list(img.shape)
-            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
-            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
-            _PIL_RESIZE_TO_INTERPOLATE_MODE = {Image.BILINEAR: "bilinear", Image.BICUBIC: "bicubic"}
-            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[self.interp]
-            img = F.interpolate(img, (self.new_h, self.new_w), mode=mode, align_corners=False)
-            shape[:2] = (self.new_h, self.new_w)
-            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
-
-        return ret
-
-    def apply_coords(self, coords):
-        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
-        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-    def inverse(self):
-        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
-
-
-class RotationTransform(Transform):
-    """
-    This method returns a copy of this image, rotated the given
-    number of degrees counter clockwise around its center.
-    """
-
-    def __init__(self, h, w, angle, expand=True, center=None, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            angle (float): degrees for rotation
-            expand (bool): choose if the image should be resized to fit the whole
-                rotated image (default), or simply cropped
-            center (tuple (width, height)): coordinates of the rotation center
-                if left to None, the center will be fit to the center of each image
-                center has no effect if expand=True because it only affects shifting
-            interp: cv2 interpolation method, default cv2.INTER_LINEAR
-        """
-        super().__init__()
-        image_center = np.array((w / 2, h / 2))
-        if center is None:
-            center = image_center
-        if interp is None:
-            interp = cv2.INTER_LINEAR
-        abs_cos, abs_sin = abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))
-        if expand:
-            # find the new width and height bounds
-            bound_w, bound_h = np.rint(
-                [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
-            ).astype(int)
-        else:
-            bound_w, bound_h = w, h
-
-        self._set_attributes(locals())
-        self.rm_coords = self.create_rotation_matrix()
-        # Needed because of this problem https://github.com/opencv/opencv/issues/11784
-        self.rm_image = self.create_rotation_matrix(offset=-0.5)
-
-    def apply_image(self, img, interp=None):
-        """
-        demo should be a numpy array, formatted as Height * Width * Nchannels
-        """
-        if len(img) == 0 or self.angle % 360 == 0:
-            return img
-        assert img.shape[:2] == (self.h, self.w)
-        interp = interp if interp is not None else self.interp
-        return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
-
-    def apply_coords(self, coords):
-        """
-        coords should be a N * 2 array-like, containing N couples of (x, y) points
-        """
-        coords = np.asarray(coords, dtype=float)
-        if len(coords) == 0 or self.angle % 360 == 0:
-            return coords
-        return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
-        return segmentation
-
-    def create_rotation_matrix(self, offset=0):
-        center = (self.center[0] + offset, self.center[1] + offset)
-        rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
-        if self.expand:
-            # Find the coordinates of the center of rotation in the new image
-            # The only point for which we know the future coordinates is the center of the image
-            rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
-            new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
-            # shift the rotation center to the new coordinates
-            rm[:, 2] += new_center
-        return rm
-
-
-def HFlip_rotated_box(transform, rotated_boxes):
-    """
-    Apply the horizontal flip transform on rotated boxes.
-
-    Args:
-        rotated_boxes (ndarray): Nx5 floating point array of
-            (x_center, y_center, width, height, angle_degrees) format
-            in absolute coordinates.
-    """
-    # Transform x_center
-    rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
-    # Transform angle
-    rotated_boxes[:, 4] = -rotated_boxes[:, 4]
-    return rotated_boxes
-
-
-def Resize_rotated_box(transform, rotated_boxes):
-    """
-    Apply the resizing transform on rotated boxes. For details of how these (approximation)
-    formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
-
-    Args:
-        rotated_boxes (ndarray): Nx5 floating point array of
-            (x_center, y_center, width, height, angle_degrees) format
-            in absolute coordinates.
-    """
-    scale_factor_x = transform.new_w * 1.0 / transform.w
-    scale_factor_y = transform.new_h * 1.0 / transform.h
-    rotated_boxes[:, 0] *= scale_factor_x
-    rotated_boxes[:, 1] *= scale_factor_y
-    theta = rotated_boxes[:, 4] * np.pi / 180.0
-    c = np.cos(theta)
-    s = np.sin(theta)
-    rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
-    rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
-    rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
-
-    return rotated_boxes
-
-
-HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
-NoOpTransform.register_type("rotated_box", lambda t, x: x)
-ResizeTransform.register_type("rotated_box", Resize_rotated_box)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py
deleted file mode 100644
index 197a0eb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py
+++ /dev/null
@@ -1,534 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# File: transformer.py
-
-import inspect
-import numpy as np
-import pprint
-import sys
-from abc import ABCMeta, abstractmethod
-from fvcore.transforms.transform import (
-    BlendTransform,
-    CropTransform,
-    HFlipTransform,
-    NoOpTransform,
-    Transform,
-    TransformList,
-    VFlipTransform,
-)
-from PIL import Image
-
-from .transform import ExtentTransform, ResizeTransform, RotationTransform
-
-__all__ = [
-    "RandomApply",
-    "RandomBrightness",
-    "RandomContrast",
-    "RandomCrop",
-    "RandomExtent",
-    "RandomFlip",
-    "RandomSaturation",
-    "RandomLighting",
-    "RandomRotation",
-    "Resize",
-    "ResizeShortestEdge",
-    "TransformGen",
-    "apply_transform_gens",
-]
-
-
-def check_dtype(img):
-    assert isinstance(img, np.ndarray), "[TransformGen] Needs an numpy array, but got a {}!".format(
-        type(img)
-    )
-    assert not isinstance(img.dtype, np.integer) or (
-        img.dtype == np.uint8
-    ), "[TransformGen] Got image of type {}, use uint8 or floating points instead!".format(
-        img.dtype
-    )
-    assert img.ndim in [2, 3], img.ndim
-
-
-class TransformGen(metaclass=ABCMeta):
-    """
-    TransformGen takes an image of type uint8 in range [0, 255], or
-    floating point in range [0, 1] or [0, 255] as input.
-
-    It creates a :class:`Transform` based on the given image, sometimes with randomness.
-    The transform can then be used to transform images
-    or other data (boxes, points, annotations, etc.) associated with it.
-
-    The assumption made in this class
-    is that the image itself is sufficient to instantiate a transform.
-    When this assumption is not true, you need to create the transforms by your own.
-
-    A list of `TransformGen` can be applied with :func:`apply_transform_gens`.
-    """
-
-    def _init(self, params=None):
-        if params:
-            for k, v in params.items():
-                if k != "self" and not k.startswith("_"):
-                    setattr(self, k, v)
-
-    @abstractmethod
-    def get_transform(self, img):
-        pass
-
-    def _rand_range(self, low=1.0, high=None, size=None):
-        """
-        Uniform float random number between low and high.
-        """
-        if high is None:
-            low, high = 0, low
-        if size is None:
-            size = []
-        return np.random.uniform(low, high, size)
-
-    def __repr__(self):
-        """
-        Produce something like:
-        "MyTransformGen(field1={self.field1}, field2={self.field2})"
-        """
-        try:
-            sig = inspect.signature(self.__init__)
-            classname = type(self).__name__
-            argstr = []
-            for name, param in sig.parameters.items():
-                assert (
-                    param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD
-                ), "The default __repr__ doesn't support *args or **kwargs"
-                assert hasattr(self, name), (
-                    "Attribute {} not found! "
-                    "Default __repr__ only works if attributes match the constructor.".format(name)
-                )
-                attr = getattr(self, name)
-                default = param.default
-                if default is attr:
-                    continue
-                argstr.append("{}={}".format(name, pprint.pformat(attr)))
-            return "{}({})".format(classname, ", ".join(argstr))
-        except AssertionError:
-            return super().__repr__()
-
-    __str__ = __repr__
-
-
-class RandomApply(TransformGen):
-    """
-    Randomly apply the wrapper transformation with a given probability.
-    """
-
-    def __init__(self, transform, prob=0.5):
-        """
-        Args:
-            transform (Transform, TransformGen): the transform to be wrapped
-                by the `RandomApply`. The `transform` can either be a
-                `Transform` or `TransformGen` instance.
-            prob (float): probability between 0.0 and 1.0 that
-                the wrapper transformation is applied
-        """
-        super().__init__()
-        assert isinstance(transform, (Transform, TransformGen)), (
-            f"The given transform must either be a Transform or TransformGen instance. "
-            f"Not {type(transform)}"
-        )
-        assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})"
-        self.prob = prob
-        self.transform = transform
-
-    def get_transform(self, img):
-        do = self._rand_range() < self.prob
-        if do:
-            if isinstance(self.transform, TransformGen):
-                return self.transform.get_transform(img)
-            else:
-                return self.transform
-        else:
-            return NoOpTransform()
-
-
-class RandomFlip(TransformGen):
-    """
-    Flip the image horizontally or vertically with the given probability.
-    """
-
-    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
-        """
-        Args:
-            prob (float): probability of flip.
-            horizontal (boolean): whether to apply horizontal flipping
-            vertical (boolean): whether to apply vertical flipping
-        """
-        super().__init__()
-
-        if horizontal and vertical:
-            raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.")
-        if not horizontal and not vertical:
-            raise ValueError("At least one of horiz or vert has to be True!")
-        self._init(locals())
-
-    def get_transform(self, img):
-        h, w = img.shape[:2]
-        do = self._rand_range() < self.prob
-        if do:
-            if self.horizontal:
-                return HFlipTransform(w)
-            elif self.vertical:
-                return VFlipTransform(h)
-        else:
-            return NoOpTransform()
-
-
-class Resize(TransformGen):
-    """ Resize image to a target size"""
-
-    def __init__(self, shape, interp=Image.BILINEAR):
-        """
-        Args:
-            shape: (h, w) tuple or a int
-            interp: PIL interpolation method
-        """
-        if isinstance(shape, int):
-            shape = (shape, shape)
-        shape = tuple(shape)
-        self._init(locals())
-
-    def get_transform(self, img):
-        return ResizeTransform(
-            img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp
-        )
-
-
-class ResizeShortestEdge(TransformGen):
-    """
-    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
-    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
-    """
-
-    def __init__(
-        self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
-    ):
-        """
-        Args:
-            short_edge_length (list[int]): If ``sample_style=="range"``,
-                a [min, max] interval from which to sample the shortest edge length.
-                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
-            max_size (int): maximum allowed longest edge length.
-            sample_style (str): either "range" or "choice".
-        """
-        super().__init__()
-        assert sample_style in ["range", "choice"], sample_style
-
-        self.is_range = sample_style == "range"
-        if isinstance(short_edge_length, int):
-            short_edge_length = (short_edge_length, short_edge_length)
-        self._init(locals())
-
-    def get_transform(self, img):
-        h, w = img.shape[:2]
-
-        if self.is_range:
-            size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
-        else:
-            size = np.random.choice(self.short_edge_length)
-        if size == 0:
-            return NoOpTransform()
-
-        scale = size * 1.0 / min(h, w)
-        if h < w:
-            newh, neww = size, scale * w
-        else:
-            newh, neww = scale * h, size
-        if max(newh, neww) > self.max_size:
-            scale = self.max_size * 1.0 / max(newh, neww)
-            newh = newh * scale
-            neww = neww * scale
-        neww = int(neww + 0.5)
-        newh = int(newh + 0.5)
-        return ResizeTransform(h, w, newh, neww, self.interp)
-
-
-class RandomRotation(TransformGen):
-    """
-    This method returns a copy of this image, rotated the given
-    number of degrees counter clockwise around the given center.
-    """
-
-    def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None):
-        """
-        Args:
-            angle (list[float]): If ``sample_style=="range"``,
-                a [min, max] interval from which to sample the angle (in degrees).
-                If ``sample_style=="choice"``, a list of angles to sample from
-            expand (bool): choose if the image should be resized to fit the whole
-                rotated image (default), or simply cropped
-            center (list[[float, float]]):  If ``sample_style=="range"``,
-                a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center,
-                [0, 0] being the top left of the image and [1, 1] the bottom right.
-                If ``sample_style=="choice"``, a list of centers to sample from
-                Default: None, which means that the center of rotation is the center of the image
-                center has no effect if expand=True because it only affects shifting
-        """
-        super().__init__()
-        assert sample_style in ["range", "choice"], sample_style
-        self.is_range = sample_style == "range"
-        if isinstance(angle, (float, int)):
-            angle = (angle, angle)
-        if center is not None and isinstance(center[0], (float, int)):
-            center = (center, center)
-        self._init(locals())
-
-    def get_transform(self, img):
-        h, w = img.shape[:2]
-        center = None
-        if self.is_range:
-            angle = np.random.uniform(self.angle[0], self.angle[1])
-            if self.center is not None:
-                center = (
-                    np.random.uniform(self.center[0][0], self.center[1][0]),
-                    np.random.uniform(self.center[0][1], self.center[1][1]),
-                )
-        else:
-            angle = np.random.choice(self.angle)
-            if self.center is not None:
-                center = np.random.choice(self.center)
-
-        if center is not None:
-            center = (w * center[0], h * center[1])  # Convert to absolute coordinates
-
-        return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp)
-
-
-class RandomCrop(TransformGen):
-    """
-    Randomly crop a subimage out of an image.
-    """
-
-    def __init__(self, crop_type: str, crop_size):
-        """
-        Args:
-            crop_type (str): one of "relative_range", "relative", "absolute".
-                See `config/defaults.py` for explanation.
-            crop_size (tuple[float]): the relative ratio or absolute pixels of
-                height and width
-        """
-        super().__init__()
-        assert crop_type in ["relative_range", "relative", "absolute"]
-        self._init(locals())
-
-    def get_transform(self, img):
-        h, w = img.shape[:2]
-        croph, cropw = self.get_crop_size((h, w))
-        assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self)
-        h0 = np.random.randint(h - croph + 1)
-        w0 = np.random.randint(w - cropw + 1)
-        return CropTransform(w0, h0, cropw, croph)
-
-    def get_crop_size(self, image_size):
-        """
-        Args:
-            image_size (tuple): height, width
-
-        Returns:
-            crop_size (tuple): height, width in absolute pixels
-        """
-        h, w = image_size
-        if self.crop_type == "relative":
-            ch, cw = self.crop_size
-            return int(h * ch + 0.5), int(w * cw + 0.5)
-        elif self.crop_type == "relative_range":
-            crop_size = np.asarray(self.crop_size, dtype=np.float32)
-            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
-            return int(h * ch + 0.5), int(w * cw + 0.5)
-        elif self.crop_type == "absolute":
-            return (min(self.crop_size[0], h), min(self.crop_size[1], w))
-        else:
-            NotImplementedError("Unknown crop type {}".format(self.crop_type))
-
-
-class RandomExtent(TransformGen):
-    """
-    Outputs an image by cropping a random "subrect" of the source image.
-
-    The subrect can be parameterized to include pixels outside the source image,
-    in which case they will be set to zeros (i.e. black). The size of the output
-    image will vary with the size of the random subrect.
-    """
-
-    def __init__(self, scale_range, shift_range):
-        """
-        Args:
-            output_size (h, w): Dimensions of output image
-            scale_range (l, h): Range of input-to-output size scaling factor
-            shift_range (x, y): Range of shifts of the cropped subrect. The rect
-                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
-                where (w, h) is the (width, height) of the input image. Set each
-                component to zero to crop at the image's center.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img):
-        img_h, img_w = img.shape[:2]
-
-        # Initialize src_rect to fit the input image.
-        src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
-
-        # Apply a random scaling to the src_rect.
-        src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
-
-        # Apply a random shift to the coordinates origin.
-        src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
-        src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
-
-        # Map src_rect coordinates into image coordinates (center at corner).
-        src_rect[0::2] += 0.5 * img_w
-        src_rect[1::2] += 0.5 * img_h
-
-        return ExtentTransform(
-            src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
-            output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])),
-        )
-
-
-class RandomContrast(TransformGen):
-    """
-    Randomly transforms image contrast.
-
-    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce contrast
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase contrast
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation
-            intensity_max (float): Maximum augmentation
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img):
-        w = np.random.uniform(self.intensity_min, self.intensity_max)
-        return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w)
-
-
-class RandomBrightness(TransformGen):
-    """
-    Randomly transforms image brightness.
-
-    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce brightness
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase brightness
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation
-            intensity_max (float): Maximum augmentation
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img):
-        w = np.random.uniform(self.intensity_min, self.intensity_max)
-        return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
-
-
-class RandomSaturation(TransformGen):
-    """
-    Randomly transforms image saturation.
-
-    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce saturation (make the image more grayscale)
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase saturation
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation (1 preserves input).
-            intensity_max (float): Maximum augmentation (1 preserves input).
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img):
-        assert img.shape[-1] == 3, "Saturation only works on RGB images"
-        w = np.random.uniform(self.intensity_min, self.intensity_max)
-        grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
-        return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
-
-
-class RandomLighting(TransformGen):
-    """
-    Randomly transforms image color using fixed PCA over ImageNet.
-
-    The degree of color jittering is randomly sampled via a normal distribution,
-    with standard deviation given by the scale parameter.
-    """
-
-    def __init__(self, scale):
-        """
-        Args:
-            scale (float): Standard deviation of principal component weighting.
-        """
-        super().__init__()
-        self._init(locals())
-        self.eigen_vecs = np.array(
-            [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]
-        )
-        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
-
-    def get_transform(self, img):
-        assert img.shape[-1] == 3, "Saturation only works on RGB images"
-        weights = np.random.normal(scale=self.scale, size=3)
-        return BlendTransform(
-            src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0
-        )
-
-
-def apply_transform_gens(transform_gens, img):
-    """
-    Apply a list of :class:`TransformGen` or :class:`Transform` on the input image, and
-    returns the transformed image and a list of transforms.
-
-    We cannot simply create and return all transforms without
-    applying it to the image, because a subsequent transform may
-    need the output of the previous one.
-
-    Args:
-        transform_gens (list): list of :class:`TransformGen` or :class:`Transform` instance to
-            be applied.
-        img (ndarray): uint8 or floating point images with 1 or 3 channels.
-
-    Returns:
-        ndarray: the transformed image
-        TransformList: contain the transforms that's used.
-    """
-    for g in transform_gens:
-        assert isinstance(g, (Transform, TransformGen)), g
-
-    check_dtype(img)
-
-    tfms = []
-    for g in transform_gens:
-        tfm = g.get_transform(img) if isinstance(g, TransformGen) else g
-        assert isinstance(
-            tfm, Transform
-        ), "TransformGen {} must return an instance of Transform! Got {} instead".format(g, tfm)
-        img = tfm.apply_image(img)
-        tfms.append(tfm)
-    return img, TransformList(tfms)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py
deleted file mode 100644
index 6a4538d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-from .launch import *
-from .train_loop import *
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
-
-
-# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
-# but still make them available here
-from .hooks import *
-from .defaults import *
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py
deleted file mode 100644
index db9ab68..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py
+++ /dev/null
@@ -1,531 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-This file contains components with some default boilerplate logic user may need
-in training / testing. They will not work for everyone, but many users may find them useful.
-
-The behavior of functions/classes in this file is subject to change,
-since they are meant to represent the "common default behavior" people need in their projects.
-"""
-
-import argparse
-import logging
-import os
-import sys
-from collections import OrderedDict
-import torch
-from fvcore.common.file_io import PathManager
-from fvcore.nn.precise_bn import get_bn_modules
-from torch.nn.parallel import DistributedDataParallel
-
-import detectron2.data.transforms as T
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.data import (
-    MetadataCatalog,
-    build_detection_test_loader,
-    build_detection_train_loader,
-)
-from detectron2.evaluation import (
-    DatasetEvaluator,
-    inference_on_dataset,
-    print_csv_format,
-    verify_results,
-)
-from detectron2.modeling import build_model
-from detectron2.solver import build_lr_scheduler, build_optimizer
-from detectron2.utils import comm
-from detectron2.utils.collect_env import collect_env_info
-from detectron2.utils.env import seed_all_rng
-from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
-from detectron2.utils.logger import setup_logger
-
-from . import hooks
-from .train_loop import SimpleTrainer
-
-__all__ = ["default_argument_parser", "default_setup", "DefaultPredictor", "DefaultTrainer"]
-
-
-def default_argument_parser(epilog=None):
-    """
-    Create a parser with some common arguments used by detectron2 users.
-
-    Args:
-        epilog (str): epilog passed to ArgumentParser describing the usage.
-
-    Returns:
-        argparse.ArgumentParser:
-    """
-    parser = argparse.ArgumentParser(
-        epilog=epilog
-        or f"""
-Examples:
-
-Run on single machine:
-    $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth
-
-Run on multiple machines:
-    (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url <URL> [--other-flags]
-    (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url <URL> [--other-flags]
-""",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
-    parser.add_argument(
-        "--resume",
-        action="store_true",
-        help="whether to attempt to resume from the checkpoint directory",
-    )
-    parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
-    parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
-    parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
-    parser.add_argument(
-        "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
-    )
-
-    # PyTorch still may leave orphan processes in multi-gpu training.
-    # Therefore we use a deterministic way to obtain port,
-    # so that users are aware of orphan processes by seeing the port occupied.
-    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
-    parser.add_argument(
-        "--dist-url",
-        default="tcp://127.0.0.1:{}".format(port),
-        help="initialization URL for pytorch distributed backend. See "
-        "https://pytorch.org/docs/stable/distributed.html for details.",
-    )
-    parser.add_argument(
-        "opts",
-        help="Modify config options using the command-line",
-        default=None,
-        nargs=argparse.REMAINDER,
-    )
-    return parser
-
-
-def default_setup(cfg, args):
-    """
-    Perform some basic common setups at the beginning of a job, including:
-
-    1. Set up the detectron2 logger
-    2. Log basic information about environment, cmdline arguments, and config
-    3. Backup the config to the output directory
-
-    Args:
-        cfg (CfgNode): the full config to be used
-        args (argparse.NameSpace): the command line arguments to be logged
-    """
-    output_dir = cfg.OUTPUT_DIR
-    if comm.is_main_process() and output_dir:
-        PathManager.mkdirs(output_dir)
-
-    rank = comm.get_rank()
-    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
-    logger = setup_logger(output_dir, distributed_rank=rank)
-
-    logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
-    logger.info("Environment info:\n" + collect_env_info())
-
-    logger.info("Command line arguments: " + str(args))
-    if hasattr(args, "config_file") and args.config_file != "":
-        logger.info(
-            "Contents of args.config_file={}:\n{}".format(
-                args.config_file, PathManager.open(args.config_file, "r").read()
-            )
-        )
-
-    logger.info("Running with full config:\n{}".format(cfg))
-    if comm.is_main_process() and output_dir:
-        # Note: some of our scripts may expect the existence of
-        # config.yaml in output directory
-        path = os.path.join(output_dir, "config.yaml")
-        with PathManager.open(path, "w") as f:
-            f.write(cfg.dump())
-        logger.info("Full config saved to {}".format(path))
-
-    # make sure each worker has a different, yet deterministic seed if specified
-    seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank)
-
-    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
-    # typical validation set.
-    if not (hasattr(args, "eval_only") and args.eval_only):
-        torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
-
-
-class DefaultPredictor:
-    """
-    Create a simple end-to-end predictor with the given config that runs on
-    single device for a single input image.
-
-    Compared to using the model directly, this class does the following additions:
-
-    1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
-    2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
-    3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
-    4. Take one input image and produce a single output, instead of a batch.
-
-    If you'd like to do anything more fancy, please refer to its source code
-    as examples to build and use the model manually.
-
-    Attributes:
-        metadata (Metadata): the metadata of the underlying dataset, obtained from
-            cfg.DATASETS.TEST.
-
-    Examples:
-
-    .. code-block:: python
-
-        pred = DefaultPredictor(cfg)
-        inputs = cv2.imread("input.jpg")
-        outputs = pred(inputs)
-    """
-
-    def __init__(self, cfg):
-        self.cfg = cfg.clone()  # cfg can be modified by model
-        self.model = build_model(self.cfg)
-        self.model.eval()
-        self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
-
-        checkpointer = DetectionCheckpointer(self.model)
-        checkpointer.load(cfg.MODEL.WEIGHTS)
-
-        self.transform_gen = T.ResizeShortestEdge(
-            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
-        )
-
-        self.input_format = cfg.INPUT.FORMAT
-        assert self.input_format in ["RGB", "BGR"], self.input_format
-
-    def __call__(self, original_image):
-        """
-        Args:
-            original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
-
-        Returns:
-            predictions (dict):
-                the output of the model for one image only.
-                See :doc:`/tutorials/models` for details about the format.
-        """
-        with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
-            # Apply pre-processing to image.
-            if self.input_format == "RGB":
-                # whether the model expects BGR inputs or RGB
-                original_image = original_image[:, :, ::-1]
-            height, width = original_image.shape[:2]
-            image = self.transform_gen.get_transform(original_image).apply_image(original_image)
-            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
-
-            inputs = {"image": image, "height": height, "width": width}
-            predictions = self.model([inputs])[0]
-            return predictions
-
-
-class DefaultTrainer(SimpleTrainer):
-    """
-    A trainer with default training logic. Compared to `SimpleTrainer`, it
-    contains the following logic in addition:
-
-    1. Create model, optimizer, scheduler, dataloader from the given config.
-    2. Load a checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when
-       `resume_or_load` is called.
-    3. Register a few common hooks.
-
-    It is created to simplify the **standard model training workflow** and reduce code boilerplate
-    for users who only need the standard training workflow, with standard features.
-    It means this class makes *many assumptions* about your training logic that
-    may easily become invalid in a new research. In fact, any assumptions beyond those made in the
-    :class:`SimpleTrainer` are too much for research.
-
-    The code of this class has been annotated about restrictive assumptions it mades.
-    When they do not work for you, you're encouraged to:
-
-    1. Overwrite methods of this class, OR:
-    2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
-       nothing else. You can then add your own hooks if needed. OR:
-    3. Write your own training loop similar to `tools/plain_train_net.py`.
-
-    Also note that the behavior of this class, like other functions/classes in
-    this file, is not stable, since it is meant to represent the "common default behavior".
-    It is only guaranteed to work well with the standard models and training workflow in detectron2.
-    To obtain more stable behavior, write your own training logic with other public APIs.
-
-    Examples:
-
-    .. code-block:: python
-
-        trainer = DefaultTrainer(cfg)
-        trainer.resume_or_load()  # load last checkpoint or MODEL.WEIGHTS
-        trainer.train()
-
-    Attributes:
-        scheduler:
-        checkpointer (DetectionCheckpointer):
-        cfg (CfgNode):
-    """
-
-    def __init__(self, cfg):
-        """
-        Args:
-            cfg (CfgNode):
-        """
-        logger = logging.getLogger("detectron2")
-        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
-            setup_logger()
-        # Assume these objects must be constructed in this order.
-        model = self.build_model(cfg)
-        optimizer = self.build_optimizer(cfg, model)
-        data_loader = self.build_train_loader(cfg)
-
-        # For training, wrap with DDP. But don't need this for inference.
-        if comm.get_world_size() > 1:
-            model = DistributedDataParallel(
-                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
-            )
-        super().__init__(model, data_loader, optimizer)
-
-        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
-        # Assume no other objects need to be checkpointed.
-        # We can later make it checkpoint the stateful hooks
-        self.checkpointer = DetectionCheckpointer(
-            # Assume you want to save checkpoints together with logs/statistics
-            model,
-            cfg.OUTPUT_DIR,
-            optimizer=optimizer,
-            scheduler=self.scheduler,
-        )
-        self.start_iter = 0
-        self.max_iter = cfg.SOLVER.MAX_ITER
-        self.cfg = cfg
-
-        self.register_hooks(self.build_hooks())
-
-    def resume_or_load(self, resume=True):
-        """
-        If `resume==True`, and last checkpoint exists, resume from it and load all
-        checkpointables (eg. optimizer and scheduler).
-
-        Otherwise, load the model specified by the config (skip all checkpointables).
-
-        Args:
-            resume (bool): whether to do resume or not
-        """
-        checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
-        self.start_iter = checkpoint.get("iteration", -1) if resume else -1
-        # The checkpoint stores the training iteration that just finished, thus we start
-        # at the next iteration (or iter zero if there's no checkpoint).
-        self.start_iter += 1
-
-    def build_hooks(self):
-        """
-        Build a list of default hooks, including timing, evaluation,
-        checkpointing, lr scheduling, precise BN, writing events.
-
-        Returns:
-            list[HookBase]:
-        """
-        cfg = self.cfg.clone()
-        cfg.defrost()
-        cfg.DATALOADER.NUM_WORKERS = 0  # save some memory and time for PreciseBN
-
-        ret = [
-            hooks.IterationTimer(),
-            hooks.LRScheduler(self.optimizer, self.scheduler),
-            hooks.PreciseBN(
-                # Run at the same freq as (but before) evaluation.
-                cfg.TEST.EVAL_PERIOD,
-                self.model,
-                # Build a new data loader to not affect training
-                self.build_train_loader(cfg),
-                cfg.TEST.PRECISE_BN.NUM_ITER,
-            )
-            if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
-            else None,
-        ]
-
-        # Do PreciseBN before checkpointer, because it updates the model and need to
-        # be saved by checkpointer.
-        # This is not always the best: if checkpointing has a different frequency,
-        # some checkpoints may have more precise statistics than others.
-        if comm.is_main_process():
-            ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
-
-        def test_and_save_results():
-            self._last_eval_results = self.test(self.cfg, self.model)
-            return self._last_eval_results
-
-        # Do evaluation after checkpointer, because then if it fails,
-        # we can use the saved checkpoint to debug.
-        ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
-
-        if comm.is_main_process():
-            # run writers in the end, so that evaluation metrics are written
-            ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
-        return ret
-
-    def build_writers(self):
-        """
-        Build a list of writers to be used. By default it contains
-        writers that write metrics to the screen,
-        a json file, and a tensorboard event file respectively.
-        If you'd like a different list of writers, you can overwrite it in
-        your trainer.
-
-        Returns:
-            list[EventWriter]: a list of :class:`EventWriter` objects.
-
-        It is now implemented by:
-
-        .. code-block:: python
-
-            return [
-                CommonMetricPrinter(self.max_iter),
-                JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")),
-                TensorboardXWriter(self.cfg.OUTPUT_DIR),
-            ]
-
-        """
-        # Here the default print/log frequency of each writer is used.
-        return [
-            # It may not always print what you want to see, since it prints "common" metrics only.
-            CommonMetricPrinter(self.max_iter),
-            JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")),
-            TensorboardXWriter(self.cfg.OUTPUT_DIR),
-        ]
-
-    def train(self):
-        """
-        Run training.
-
-        Returns:
-            OrderedDict of results, if evaluation is enabled. Otherwise None.
-        """
-        super().train(self.start_iter, self.max_iter)
-        if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process():
-            assert hasattr(
-                self, "_last_eval_results"
-            ), "No evaluation results obtained during training!"
-            verify_results(self.cfg, self._last_eval_results)
-            return self._last_eval_results
-
-    @classmethod
-    def build_model(cls, cfg):
-        """
-        Returns:
-            torch.nn.Module:
-
-        It now calls :func:`detectron2.modeling.build_model`.
-        Overwrite it if you'd like a different model.
-        """
-        model = build_model(cfg)
-        logger = logging.getLogger(__name__)
-        logger.info("Model:\n{}".format(model))
-        return model
-
-    @classmethod
-    def build_optimizer(cls, cfg, model):
-        """
-        Returns:
-            torch.optim.Optimizer:
-
-        It now calls :func:`detectron2.solver.build_optimizer`.
-        Overwrite it if you'd like a different optimizer.
-        """
-        return build_optimizer(cfg, model)
-
-    @classmethod
-    def build_lr_scheduler(cls, cfg, optimizer):
-        """
-        It now calls :func:`detectron2.solver.build_lr_scheduler`.
-        Overwrite it if you'd like a different scheduler.
-        """
-        return build_lr_scheduler(cfg, optimizer)
-
-    @classmethod
-    def build_train_loader(cls, cfg):
-        """
-        Returns:
-            iterable
-
-        It now calls :func:`detectron2.data.build_detection_train_loader`.
-        Overwrite it if you'd like a different data loader.
-        """
-        return build_detection_train_loader(cfg)
-
-    @classmethod
-    def build_test_loader(cls, cfg, dataset_name):
-        """
-        Returns:
-            iterable
-
-        It now calls :func:`detectron2.data.build_detection_test_loader`.
-        Overwrite it if you'd like a different data loader.
-        """
-        return build_detection_test_loader(cfg, dataset_name)
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name):
-        """
-        Returns:
-            DatasetEvaluator or None
-
-        It is not implemented by default.
-        """
-        raise NotImplementedError(
-            """
-If you want DefaultTrainer to automatically run evaluation,
-please implement `build_evaluator()` in subclasses (see train_net.py for example).
-Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example).
-"""
-        )
-
-    @classmethod
-    def test(cls, cfg, model, evaluators=None):
-        """
-        Args:
-            cfg (CfgNode):
-            model (nn.Module):
-            evaluators (list[DatasetEvaluator] or None): if None, will call
-                :meth:`build_evaluator`. Otherwise, must have the same length as
-                `cfg.DATASETS.TEST`.
-
-        Returns:
-            dict: a dict of result metrics
-        """
-        logger = logging.getLogger(__name__)
-        if isinstance(evaluators, DatasetEvaluator):
-            evaluators = [evaluators]
-        if evaluators is not None:
-            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
-                len(cfg.DATASETS.TEST), len(evaluators)
-            )
-
-        results = OrderedDict()
-        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
-            data_loader = cls.build_test_loader(cfg, dataset_name)
-            # When evaluators are passed in as arguments,
-            # implicitly assume that evaluators can be created before data_loader.
-            if evaluators is not None:
-                evaluator = evaluators[idx]
-            else:
-                try:
-                    evaluator = cls.build_evaluator(cfg, dataset_name)
-                except NotImplementedError:
-                    logger.warn(
-                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
-                        "or implement its `build_evaluator` method."
-                    )
-                    results[dataset_name] = {}
-                    continue
-            results_i = inference_on_dataset(model, data_loader, evaluator)
-            results[dataset_name] = results_i
-            if comm.is_main_process():
-                assert isinstance(
-                    results_i, dict
-                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
-                    results_i
-                )
-                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
-                print_csv_format(results_i)
-
-        if len(results) == 1:
-            results = list(results.values())[0]
-        return results
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py
deleted file mode 100644
index e5085b4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py
+++ /dev/null
@@ -1,427 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import datetime
-import itertools
-import logging
-import os
-import tempfile
-import time
-from collections import Counter
-import torch
-from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
-from fvcore.common.file_io import PathManager
-from fvcore.common.timer import Timer
-from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
-
-import detectron2.utils.comm as comm
-from detectron2.evaluation.testing import flatten_results_dict
-from detectron2.utils.events import EventStorage, EventWriter
-
-from .train_loop import HookBase
-
-__all__ = [
-    "CallbackHook",
-    "IterationTimer",
-    "PeriodicWriter",
-    "PeriodicCheckpointer",
-    "LRScheduler",
-    "AutogradProfiler",
-    "EvalHook",
-    "PreciseBN",
-]
-
-
-"""
-Implement some common hooks.
-"""
-
-
-class CallbackHook(HookBase):
-    """
-    Create a hook using callback functions provided by the user.
-    """
-
-    def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):
-        """
-        Each argument is a function that takes one argument: the trainer.
-        """
-        self._before_train = before_train
-        self._before_step = before_step
-        self._after_step = after_step
-        self._after_train = after_train
-
-    def before_train(self):
-        if self._before_train:
-            self._before_train(self.trainer)
-
-    def after_train(self):
-        if self._after_train:
-            self._after_train(self.trainer)
-        # The functions may be closures that hold reference to the trainer
-        # Therefore, delete them to avoid circular reference.
-        del self._before_train, self._after_train
-        del self._before_step, self._after_step
-
-    def before_step(self):
-        if self._before_step:
-            self._before_step(self.trainer)
-
-    def after_step(self):
-        if self._after_step:
-            self._after_step(self.trainer)
-
-
-class IterationTimer(HookBase):
-    """
-    Track the time spent for each iteration (each run_step call in the trainer).
-    Print a summary in the end of training.
-
-    This hook uses the time between the call to its :meth:`before_step`
-    and :meth:`after_step` methods.
-    Under the convention that :meth:`before_step` of all hooks should only
-    take negligible amount of time, the :class:`IterationTimer` hook should be
-    placed at the beginning of the list of hooks to obtain accurate timing.
-    """
-
-    def __init__(self, warmup_iter=3):
-        """
-        Args:
-            warmup_iter (int): the number of iterations at the beginning to exclude
-                from timing.
-        """
-        self._warmup_iter = warmup_iter
-        self._step_timer = Timer()
-        self._start_time = time.perf_counter()
-        self._total_timer = Timer()
-
-    def before_train(self):
-        self._start_time = time.perf_counter()
-        self._total_timer.reset()
-        self._total_timer.pause()
-
-    def after_train(self):
-        logger = logging.getLogger(__name__)
-        total_time = time.perf_counter() - self._start_time
-        total_time_minus_hooks = self._total_timer.seconds()
-        hook_time = total_time - total_time_minus_hooks
-
-        num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter
-
-        if num_iter > 0 and total_time_minus_hooks > 0:
-            # Speed is meaningful only after warmup
-            # NOTE this format is parsed by grep in some scripts
-            logger.info(
-                "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
-                    num_iter,
-                    str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
-                    total_time_minus_hooks / num_iter,
-                )
-            )
-
-        logger.info(
-            "Total training time: {} ({} on hooks)".format(
-                str(datetime.timedelta(seconds=int(total_time))),
-                str(datetime.timedelta(seconds=int(hook_time))),
-            )
-        )
-
-    def before_step(self):
-        self._step_timer.reset()
-        self._total_timer.resume()
-
-    def after_step(self):
-        # +1 because we're in after_step
-        iter_done = self.trainer.iter - self.trainer.start_iter + 1
-        if iter_done >= self._warmup_iter:
-            sec = self._step_timer.seconds()
-            self.trainer.storage.put_scalars(time=sec)
-        else:
-            self._start_time = time.perf_counter()
-            self._total_timer.reset()
-
-        self._total_timer.pause()
-
-
-class PeriodicWriter(HookBase):
-    """
-    Write events to EventStorage periodically.
-
-    It is executed every ``period`` iterations and after the last iteration.
-    """
-
-    def __init__(self, writers, period=20):
-        """
-        Args:
-            writers (list[EventWriter]): a list of EventWriter objects
-            period (int):
-        """
-        self._writers = writers
-        for w in writers:
-            assert isinstance(w, EventWriter), w
-        self._period = period
-
-    def after_step(self):
-        if (self.trainer.iter + 1) % self._period == 0 or (
-            self.trainer.iter == self.trainer.max_iter - 1
-        ):
-            for writer in self._writers:
-                writer.write()
-
-    def after_train(self):
-        for writer in self._writers:
-            writer.close()
-
-
-class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
-    """
-    Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook.
-
-    Note that when used as a hook,
-    it is unable to save additional data other than what's defined
-    by the given `checkpointer`.
-
-    It is executed every ``period`` iterations and after the last iteration.
-    """
-
-    def before_train(self):
-        self.max_iter = self.trainer.max_iter
-
-    def after_step(self):
-        # No way to use **kwargs
-        self.step(self.trainer.iter)
-
-
-class LRScheduler(HookBase):
-    """
-    A hook which executes a torch builtin LR scheduler and summarizes the LR.
-    It is executed after every iteration.
-    """
-
-    def __init__(self, optimizer, scheduler):
-        """
-        Args:
-            optimizer (torch.optim.Optimizer):
-            scheduler (torch.optim._LRScheduler)
-        """
-        self._optimizer = optimizer
-        self._scheduler = scheduler
-
-        # NOTE: some heuristics on what LR to summarize
-        # summarize the param group with most parameters
-        largest_group = max(len(g["params"]) for g in optimizer.param_groups)
-
-        if largest_group == 1:
-            # If all groups have one parameter,
-            # then find the most common initial LR, and use it for summary
-            lr_count = Counter([g["lr"] for g in optimizer.param_groups])
-            lr = lr_count.most_common()[0][0]
-            for i, g in enumerate(optimizer.param_groups):
-                if g["lr"] == lr:
-                    self._best_param_group_id = i
-                    break
-        else:
-            for i, g in enumerate(optimizer.param_groups):
-                if len(g["params"]) == largest_group:
-                    self._best_param_group_id = i
-                    break
-
-    def after_step(self):
-        lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
-        self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
-        self._scheduler.step()
-
-
-class AutogradProfiler(HookBase):
-    """
-    A hook which runs `torch.autograd.profiler.profile`.
-
-    Examples:
-
-    .. code-block:: python
-
-        hooks.AutogradProfiler(
-             lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
-        )
-
-    The above example will run the profiler for iteration 10~20 and dump
-    results to ``OUTPUT_DIR``. We did not profile the first few iterations
-    because they are typically slower than the rest.
-    The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
-
-    Note:
-        When used together with NCCL on older version of GPUs,
-        autograd profiler may cause deadlock because it unnecessarily allocates
-        memory on every device it sees. The memory management calls, if
-        interleaved with NCCL calls, lead to deadlock on GPUs that do not
-        support `cudaLaunchCooperativeKernelMultiDevice`.
-    """
-
-    def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
-        """
-        Args:
-            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
-                and returns whether to enable the profiler.
-                It will be called once every step, and can be used to select which steps to profile.
-            output_dir (str): the output directory to dump tracing files.
-            use_cuda (bool): same as in `torch.autograd.profiler.profile`.
-        """
-        self._enable_predicate = enable_predicate
-        self._use_cuda = use_cuda
-        self._output_dir = output_dir
-
-    def before_step(self):
-        if self._enable_predicate(self.trainer):
-            self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
-            self._profiler.__enter__()
-        else:
-            self._profiler = None
-
-    def after_step(self):
-        if self._profiler is None:
-            return
-        self._profiler.__exit__(None, None, None)
-        PathManager.mkdirs(self._output_dir)
-        out_file = os.path.join(
-            self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
-        )
-        if "://" not in out_file:
-            self._profiler.export_chrome_trace(out_file)
-        else:
-            # Support non-posix filesystems
-            with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
-                tmp_file = os.path.join(d, "tmp.json")
-                self._profiler.export_chrome_trace(tmp_file)
-                with open(tmp_file) as f:
-                    content = f.read()
-            with PathManager.open(out_file, "w") as f:
-                f.write(content)
-
-
-class EvalHook(HookBase):
-    """
-    Run an evaluation function periodically, and at the end of training.
-
-    It is executed every ``eval_period`` iterations and after the last iteration.
-    """
-
-    def __init__(self, eval_period, eval_function):
-        """
-        Args:
-            eval_period (int): the period to run `eval_function`.
-            eval_function (callable): a function which takes no arguments, and
-                returns a nested dict of evaluation metrics.
-
-        Note:
-            This hook must be enabled in all or none workers.
-            If you would like only certain workers to perform evaluation,
-            give other workers a no-op function (`eval_function=lambda: None`).
-        """
-        self._period = eval_period
-        self._func = eval_function
-
-    def _do_eval(self):
-        results = self._func()
-
-        if results:
-            assert isinstance(
-                results, dict
-            ), "Eval function must return a dict. Got {} instead.".format(results)
-
-            flattened_results = flatten_results_dict(results)
-            for k, v in flattened_results.items():
-                try:
-                    v = float(v)
-                except Exception:
-                    raise ValueError(
-                        "[EvalHook] eval_function should return a nested dict of float. "
-                        "Got '{}: {}' instead.".format(k, v)
-                    )
-            self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
-
-        # Evaluation may take different time among workers.
-        # A barrier make them start the next iteration together.
-        comm.synchronize()
-
-    def after_step(self):
-        next_iter = self.trainer.iter + 1
-        is_final = next_iter == self.trainer.max_iter
-        if is_final or (self._period > 0 and next_iter % self._period == 0):
-            self._do_eval()
-
-    def after_train(self):
-        # func is likely a closure that holds reference to the trainer
-        # therefore we clean it to avoid circular reference in the end
-        del self._func
-
-
-class PreciseBN(HookBase):
-    """
-    The standard implementation of BatchNorm uses EMA in inference, which is
-    sometimes suboptimal.
-    This class computes the true average of statistics rather than the moving average,
-    and put true averages to every BN layer in the given model.
-
-    It is executed every ``period`` iterations and after the last iteration.
-    """
-
-    def __init__(self, period, model, data_loader, num_iter):
-        """
-        Args:
-            period (int): the period this hook is run, or 0 to not run during training.
-                The hook will always run in the end of training.
-            model (nn.Module): a module whose all BN layers in training mode will be
-                updated by precise BN.
-                Note that user is responsible for ensuring the BN layers to be
-                updated are in training mode when this hook is triggered.
-            data_loader (iterable): it will produce data to be run by `model(data)`.
-            num_iter (int): number of iterations used to compute the precise
-                statistics.
-        """
-        self._logger = logging.getLogger(__name__)
-        if len(get_bn_modules(model)) == 0:
-            self._logger.info(
-                "PreciseBN is disabled because model does not contain BN layers in training mode."
-            )
-            self._disabled = True
-            return
-
-        self._model = model
-        self._data_loader = data_loader
-        self._num_iter = num_iter
-        self._period = period
-        self._disabled = False
-
-        self._data_iter = None
-
-    def after_step(self):
-        next_iter = self.trainer.iter + 1
-        is_final = next_iter == self.trainer.max_iter
-        if is_final or (self._period > 0 and next_iter % self._period == 0):
-            self.update_stats()
-
-    def update_stats(self):
-        """
-        Update the model with precise statistics. Users can manually call this method.
-        """
-        if self._disabled:
-            return
-
-        if self._data_iter is None:
-            self._data_iter = iter(self._data_loader)
-
-        def data_loader():
-            for num_iter in itertools.count(1):
-                if num_iter % 100 == 0:
-                    self._logger.info(
-                        "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
-                    )
-                # This way we can reuse the same iterator
-                yield next(self._data_iter)
-
-        with EventStorage():  # capture events in a new storage to discard them
-            self._logger.info(
-                "Running precise-BN for {} iterations...  ".format(self._num_iter)
-                + "Note that this could produce different statistics every time."
-            )
-            update_bn_stats(self._model, data_loader(), self._num_iter)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py
deleted file mode 100644
index 9efbb03..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import torch
-import torch.distributed as dist
-import torch.multiprocessing as mp
-
-from detectron2.utils import comm
-
-__all__ = ["launch"]
-
-
-def _find_free_port():
-    import socket
-
-    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    # Binding to port 0 will cause the OS to find an available port for us
-    sock.bind(("", 0))
-    port = sock.getsockname()[1]
-    sock.close()
-    # NOTE: there is still a chance the port could be taken by other processes.
-    return port
-
-
-def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()):
-    """
-    Args:
-        main_func: a function that will be called by `main_func(*args)`
-        num_machines (int): the total number of machines
-        machine_rank (int): the rank of this machine (one per machine)
-        dist_url (str): url to connect to for distributed jobs, including protocol
-                       e.g. "tcp://127.0.0.1:8686".
-                       Can be set to "auto" to automatically select a free port on localhost
-        args (tuple): arguments passed to main_func
-    """
-    world_size = num_machines * num_gpus_per_machine
-    if world_size > 1:
-        # https://github.com/pytorch/pytorch/pull/14391
-        # TODO prctl in spawned processes
-
-        if dist_url == "auto":
-            assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
-            port = _find_free_port()
-            dist_url = f"tcp://127.0.0.1:{port}"
-        if num_machines > 1 and dist_url.startswith("file://"):
-            logger = logging.getLogger(__name__)
-            logger.warning(
-                "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
-            )
-
-        mp.spawn(
-            _distributed_worker,
-            nprocs=num_gpus_per_machine,
-            args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args),
-            daemon=False,
-        )
-    else:
-        main_func(*args)
-
-
-def _distributed_worker(
-    local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
-):
-    assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
-    global_rank = machine_rank * num_gpus_per_machine + local_rank
-    try:
-        dist.init_process_group(
-            backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
-        )
-    except Exception as e:
-        logger = logging.getLogger(__name__)
-        logger.error("Process group URL: {}".format(dist_url))
-        raise e
-    # synchronize is needed here to prevent a possible timeout after calling init_process_group
-    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
-    comm.synchronize()
-
-    assert num_gpus_per_machine <= torch.cuda.device_count()
-    torch.cuda.set_device(local_rank)
-
-    # Setup the local process group (which contains ranks within the same machine)
-    assert comm._LOCAL_PROCESS_GROUP is None
-    num_machines = world_size // num_gpus_per_machine
-    for i in range(num_machines):
-        ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
-        pg = dist.new_group(ranks_on_i)
-        if i == machine_rank:
-            comm._LOCAL_PROCESS_GROUP = pg
-
-    main_func(*args)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py
deleted file mode 100644
index 453c9ac..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import logging
-import numpy as np
-import time
-import weakref
-import torch
-
-import detectron2.utils.comm as comm
-from detectron2.utils.events import EventStorage
-
-__all__ = ["HookBase", "TrainerBase", "SimpleTrainer"]
-
-
-class HookBase:
-    """
-    Base class for hooks that can be registered with :class:`TrainerBase`.
-
-    Each hook can implement 4 methods. The way they are called is demonstrated
-    in the following snippet:
-
-    .. code-block:: python
-
-        hook.before_train()
-        for iter in range(start_iter, max_iter):
-            hook.before_step()
-            trainer.run_step()
-            hook.after_step()
-        hook.after_train()
-
-    Notes:
-        1. In the hook method, users can access `self.trainer` to access more
-           properties about the context (e.g., current iteration).
-
-        2. A hook that does something in :meth:`before_step` can often be
-           implemented equivalently in :meth:`after_step`.
-           If the hook takes non-trivial time, it is strongly recommended to
-           implement the hook in :meth:`after_step` instead of :meth:`before_step`.
-           The convention is that :meth:`before_step` should only take negligible time.
-
-           Following this convention will allow hooks that do care about the difference
-           between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
-           function properly.
-
-    Attributes:
-        trainer: A weak reference to the trainer object. Set by the trainer when the hook is
-            registered.
-    """
-
-    def before_train(self):
-        """
-        Called before the first iteration.
-        """
-        pass
-
-    def after_train(self):
-        """
-        Called after the last iteration.
-        """
-        pass
-
-    def before_step(self):
-        """
-        Called before each iteration.
-        """
-        pass
-
-    def after_step(self):
-        """
-        Called after each iteration.
-        """
-        pass
-
-
-class TrainerBase:
-    """
-    Base class for iterative trainer with hooks.
-
-    The only assumption we made here is: the training runs in a loop.
-    A subclass can implement what the loop is.
-    We made no assumptions about the existence of dataloader, optimizer, model, etc.
-
-    Attributes:
-        iter(int): the current iteration.
-
-        start_iter(int): The iteration to start with.
-            By convention the minimum possible value is 0.
-
-        max_iter(int): The iteration to end training.
-
-        storage(EventStorage): An EventStorage that's opened during the course of training.
-    """
-
-    def __init__(self):
-        self._hooks = []
-
-    def register_hooks(self, hooks):
-        """
-        Register hooks to the trainer. The hooks are executed in the order
-        they are registered.
-
-        Args:
-            hooks (list[Optional[HookBase]]): list of hooks
-        """
-        hooks = [h for h in hooks if h is not None]
-        for h in hooks:
-            assert isinstance(h, HookBase)
-            # To avoid circular reference, hooks and trainer cannot own each other.
-            # This normally does not matter, but will cause memory leak if the
-            # involved objects contain __del__:
-            # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
-            h.trainer = weakref.proxy(self)
-        self._hooks.extend(hooks)
-
-    def train(self, start_iter: int, max_iter: int):
-        """
-        Args:
-            start_iter, max_iter (int): See docs above
-        """
-        logger = logging.getLogger(__name__)
-        logger.info("Starting training from iteration {}".format(start_iter))
-
-        self.iter = self.start_iter = start_iter
-        self.max_iter = max_iter
-
-        with EventStorage(start_iter) as self.storage:
-            try:
-                self.before_train()
-                for self.iter in range(start_iter, max_iter):
-                    self.before_step()
-                    self.run_step()
-                    self.after_step()
-            except Exception:
-                logger.exception("Exception during training:")
-                raise
-            finally:
-                self.after_train()
-
-    def before_train(self):
-        for h in self._hooks:
-            h.before_train()
-
-    def after_train(self):
-        for h in self._hooks:
-            h.after_train()
-
-    def before_step(self):
-        for h in self._hooks:
-            h.before_step()
-
-    def after_step(self):
-        for h in self._hooks:
-            h.after_step()
-        # this guarantees, that in each hook's after_step, storage.iter == trainer.iter
-        self.storage.step()
-
-    def run_step(self):
-        raise NotImplementedError
-
-
-class SimpleTrainer(TrainerBase):
-    """
-    A simple trainer for the most common type of task:
-    single-cost single-optimizer single-data-source iterative optimization.
-    It assumes that every step, you:
-
-    1. Compute the loss with a data from the data_loader.
-    2. Compute the gradients with the above loss.
-    3. Update the model with the optimizer.
-
-    If you want to do anything fancier than this,
-    either subclass TrainerBase and implement your own `run_step`,
-    or write your own training loop.
-    """
-
-    def __init__(self, model, data_loader, optimizer):
-        """
-        Args:
-            model: a torch Module. Takes a data from data_loader and returns a
-                dict of losses.
-            data_loader: an iterable. Contains data to be used to call model.
-            optimizer: a torch optimizer.
-        """
-        super().__init__()
-
-        """
-        We set the model to training mode in the trainer.
-        However it's valid to train a model that's in eval mode.
-        If you want your model (or a submodule of it) to behave
-        like evaluation during training, you can overwrite its train() method.
-        """
-        model.train()
-
-        self.model = model
-        self.data_loader = data_loader
-        self._data_loader_iter = iter(data_loader)
-        self.optimizer = optimizer
-
-    def run_step(self):
-        """
-        Implement the standard training logic described above.
-        """
-        assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
-        start = time.perf_counter()
-        """
-        If you want to do something with the data, you can wrap the dataloader.
-        """
-        data = next(self._data_loader_iter)
-        data_time = time.perf_counter() - start
-
-        """
-        If you want to do something with the losses, you can wrap the model.
-        """
-        loss_dict = self.model(data)
-        losses = sum(loss_dict.values())
-        self._detect_anomaly(losses, loss_dict)
-
-        metrics_dict = loss_dict
-        metrics_dict["data_time"] = data_time
-        self._write_metrics(metrics_dict)
-
-        """
-        If you need to accumulate gradients or something similar, you can
-        wrap the optimizer with your custom `zero_grad()` method.
-        """
-        self.optimizer.zero_grad()
-        losses.backward()
-
-        """
-        If you need gradient clipping/scaling or other processing, you can
-        wrap the optimizer with your custom `step()` method.
-        """
-        self.optimizer.step()
-
-    def _detect_anomaly(self, losses, loss_dict):
-        if not torch.isfinite(losses).all():
-            raise FloatingPointError(
-                "Loss became infinite or NaN at iteration={}!\nloss_dict = {}".format(
-                    self.iter, loss_dict
-                )
-            )
-
-    def _write_metrics(self, metrics_dict: dict):
-        """
-        Args:
-            metrics_dict (dict): dict of scalar metrics
-        """
-        metrics_dict = {
-            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
-            for k, v in metrics_dict.items()
-        }
-        # gather metrics among all workers for logging
-        # This assumes we do DDP-style training, which is currently the only
-        # supported method in detectron2.
-        all_metrics_dict = comm.gather(metrics_dict)
-
-        if comm.is_main_process():
-            if "data_time" in all_metrics_dict[0]:
-                # data_time among workers can have high variance. The actual latency
-                # caused by data_time is the maximum among workers.
-                data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
-                self.storage.put_scalar("data_time", data_time)
-
-            # average the rest metrics
-            metrics_dict = {
-                k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
-            }
-            total_losses_reduced = sum(loss for loss in metrics_dict.values())
-
-            self.storage.put_scalar("total_loss", total_losses_reduced)
-            if len(metrics_dict) > 1:
-                self.storage.put_scalars(**metrics_dict)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py
deleted file mode 100644
index f1d2f10..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
-from .coco_evaluation import COCOEvaluator
-from .rotated_coco_evaluation import RotatedCOCOEvaluator
-from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
-from .lvis_evaluation import LVISEvaluator
-from .panoptic_evaluation import COCOPanopticEvaluator
-from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
-from .sem_seg_evaluation import SemSegEvaluator
-from .testing import print_csv_format, verify_results
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py
deleted file mode 100644
index f6287a8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import glob
-import logging
-import numpy as np
-import os
-import tempfile
-from collections import OrderedDict
-import torch
-from fvcore.common.file_io import PathManager
-from PIL import Image
-
-from detectron2.data import MetadataCatalog
-from detectron2.utils import comm
-
-from .evaluator import DatasetEvaluator
-
-
-class CityscapesEvaluator(DatasetEvaluator):
-    """
-    Base class for evaluation using cityscapes API.
-    """
-
-    def __init__(self, dataset_name):
-        """
-        Args:
-            dataset_name (str): the name of the dataset.
-                It must have the following metadata associated with it:
-                "thing_classes", "gt_dir".
-        """
-        self._metadata = MetadataCatalog.get(dataset_name)
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-    def reset(self):
-        self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_")
-        self._temp_dir = self._working_dir.name
-        # All workers will write to the same results directory
-        # TODO this does not work in distributed training
-        self._temp_dir = comm.all_gather(self._temp_dir)[0]
-        if self._temp_dir != self._working_dir.name:
-            self._working_dir.cleanup()
-        self._logger.info(
-            "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir)
-        )
-
-
-class CityscapesInstanceEvaluator(CityscapesEvaluator):
-    """
-    Evaluate instance segmentation results using cityscapes API.
-
-    Note:
-        * It does not work in multi-machine distributed training.
-        * It contains a synchronization, therefore has to be used on all ranks.
-        * Only the main process runs evaluation.
-    """
-
-    def process(self, inputs, outputs):
-        from cityscapesscripts.helpers.labels import name2label
-
-        for input, output in zip(inputs, outputs):
-            file_name = input["file_name"]
-            basename = os.path.splitext(os.path.basename(file_name))[0]
-            pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
-
-            output = output["instances"].to(self._cpu_device)
-            num_instances = len(output)
-            with open(pred_txt, "w") as fout:
-                for i in range(num_instances):
-                    pred_class = output.pred_classes[i]
-                    classes = self._metadata.thing_classes[pred_class]
-                    class_id = name2label[classes].id
-                    score = output.scores[i]
-                    mask = output.pred_masks[i].numpy().astype("uint8")
-                    png_filename = os.path.join(
-                        self._temp_dir, basename + "_{}_{}.png".format(i, classes)
-                    )
-
-                    Image.fromarray(mask * 255).save(png_filename)
-                    fout.write("{} {} {}\n".format(os.path.basename(png_filename), class_id, score))
-
-    def evaluate(self):
-        """
-        Returns:
-            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
-        """
-        comm.synchronize()
-        if comm.get_rank() > 0:
-            return
-        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
-
-        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
-
-        # set some global states in cityscapes evaluation API, before evaluating
-        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
-        cityscapes_eval.args.predictionWalk = None
-        cityscapes_eval.args.JSONOutput = False
-        cityscapes_eval.args.colorized = False
-        cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")
-
-        # These lines are adopted from
-        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
-        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
-        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png"))
-        assert len(
-            groundTruthImgList
-        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
-            cityscapes_eval.args.groundTruthSearch
-        )
-        predictionImgList = []
-        for gt in groundTruthImgList:
-            predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
-        results = cityscapes_eval.evaluateImgLists(
-            predictionImgList, groundTruthImgList, cityscapes_eval.args
-        )["averages"]
-
-        ret = OrderedDict()
-        ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
-        self._working_dir.cleanup()
-        return ret
-
-
-class CityscapesSemSegEvaluator(CityscapesEvaluator):
-    """
-    Evaluate semantic segmentation results using cityscapes API.
-
-    Note:
-        * It does not work in multi-machine distributed training.
-        * It contains a synchronization, therefore has to be used on all ranks.
-        * Only the main process runs evaluation.
-    """
-
-    def process(self, inputs, outputs):
-        from cityscapesscripts.helpers.labels import trainId2label
-
-        for input, output in zip(inputs, outputs):
-            file_name = input["file_name"]
-            basename = os.path.splitext(os.path.basename(file_name))[0]
-            pred_filename = os.path.join(self._temp_dir, basename + "_pred.png")
-
-            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy()
-            pred = 255 * np.ones(output.shape, dtype=np.uint8)
-            for train_id, label in trainId2label.items():
-                if label.ignoreInEval:
-                    continue
-                pred[output == train_id] = label.id
-            Image.fromarray(pred).save(pred_filename)
-
-    def evaluate(self):
-        comm.synchronize()
-        if comm.get_rank() > 0:
-            return
-        # Load the Cityscapes eval script *after* setting the required env var,
-        # since the script reads CITYSCAPES_DATASET into global variables at load time.
-        import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval
-
-        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
-
-        # set some global states in cityscapes evaluation API, before evaluating
-        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
-        cityscapes_eval.args.predictionWalk = None
-        cityscapes_eval.args.JSONOutput = False
-        cityscapes_eval.args.colorized = False
-
-        # These lines are adopted from
-        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa
-        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
-        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png"))
-        assert len(
-            groundTruthImgList
-        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
-            cityscapes_eval.args.groundTruthSearch
-        )
-        predictionImgList = []
-        for gt in groundTruthImgList:
-            predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt))
-        results = cityscapes_eval.evaluateImgLists(
-            predictionImgList, groundTruthImgList, cityscapes_eval.args
-        )
-        ret = OrderedDict()
-        ret["sem_seg"] = {
-            "IoU": 100.0 * results["averageScoreClasses"],
-            "iIoU": 100.0 * results["averageScoreInstClasses"],
-            "IoU_sup": 100.0 * results["averageScoreCategories"],
-            "iIoU_sup": 100.0 * results["averageScoreInstCategories"],
-        }
-        self._working_dir.cleanup()
-        return ret
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py
deleted file mode 100644
index 64b0903..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py
+++ /dev/null
@@ -1,512 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import contextlib
-import copy
-import io
-import itertools
-import json
-import logging
-import numpy as np
-import os
-import pickle
-from collections import OrderedDict
-import pycocotools.mask as mask_util
-import torch
-from fvcore.common.file_io import PathManager
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
-from tabulate import tabulate
-
-import detectron2.utils.comm as comm
-from detectron2.data import MetadataCatalog
-from detectron2.data.datasets.coco import convert_to_coco_json
-from detectron2.structures import Boxes, BoxMode, pairwise_iou
-from detectron2.utils.logger import create_small_table
-
-from .evaluator import DatasetEvaluator
-
-
-class COCOEvaluator(DatasetEvaluator):
-    """
-    Evaluate object proposal, instance detection/segmentation, keypoint detection
-    outputs using COCO's metrics and APIs.
-    """
-
-    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
-        """
-        Args:
-            dataset_name (str): name of the dataset to be evaluated.
-                It must have either the following corresponding metadata:
-
-                    "json_file": the path to the COCO format annotation
-
-                Or it must be in detectron2's standard dataset format
-                so it can be converted to COCO format automatically.
-            cfg (CfgNode): config instance
-            distributed (True): if True, will collect results from all ranks and run evaluation
-                in the main process.
-                Otherwise, will evaluate the results in the current process.
-            output_dir (str): optional, an output directory to dump all
-                results predicted on the dataset. The dump contains two files:
-
-                1. "instance_predictions.pth" a file in torch serialization
-                   format that contains all the raw original predictions.
-                2. "coco_instances_results.json" a json file in COCO's result
-                   format.
-        """
-        self._tasks = self._tasks_from_config(cfg)
-        self._distributed = distributed
-        self._output_dir = output_dir
-
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-        self._metadata = MetadataCatalog.get(dataset_name)
-        if not hasattr(self._metadata, "json_file"):
-            self._logger.warning(
-                f"json_file was not found in MetaDataCatalog for '{dataset_name}'."
-                " Trying to convert it to COCO format ..."
-            )
-
-            cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json")
-            self._metadata.json_file = cache_path
-            convert_to_coco_json(dataset_name, cache_path)
-
-        json_file = PathManager.get_local_path(self._metadata.json_file)
-        with contextlib.redirect_stdout(io.StringIO()):
-            self._coco_api = COCO(json_file)
-
-        self._kpt_oks_sigmas = cfg.TEST.KEYPOINT_OKS_SIGMAS
-        # Test set json files do not contain annotations (evaluation must be
-        # performed using the COCO evaluation server).
-        self._do_evaluation = "annotations" in self._coco_api.split_name
-
-    def reset(self):
-        self._predictions = []
-
-    def _tasks_from_config(self, cfg):
-        """
-        Returns:
-            tuple[str]: tasks that can be evaluated under the given configuration.
-        """
-        tasks = ("bbox",)
-        if cfg.MODEL.MASK_ON:
-            tasks = tasks + ("segm",)
-        if cfg.MODEL.KEYPOINT_ON:
-            tasks = tasks + ("keypoints",)
-        return tasks
-
-    def process(self, inputs, outputs):
-        """
-        Args:
-            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
-                It is a list of dict. Each dict corresponds to an image and
-                contains keys like "height", "width", "file_name", "image_id".
-            outputs: the outputs of a COCO model. It is a list of dicts with key
-                "instances" that contains :class:`Instances`.
-        """
-        for input, output in zip(inputs, outputs):
-            prediction = {"image_id": input["image_id"]}
-
-            # TODO this is ugly
-            if "instances" in output:
-                instances = output["instances"].to(self._cpu_device)
-                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
-            if "proposals" in output:
-                prediction["proposals"] = output["proposals"].to(self._cpu_device)
-            self._predictions.append(prediction)
-
-    def evaluate(self):
-        if self._distributed:
-            comm.synchronize()
-            predictions = comm.gather(self._predictions, dst=0)
-            predictions = list(itertools.chain(*predictions))
-
-            if not comm.is_main_process():
-                return {}
-        else:
-            predictions = self._predictions
-
-        if len(predictions) == 0:
-            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
-            return {}
-
-        if self._output_dir:
-            PathManager.mkdirs(self._output_dir)
-            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
-            with PathManager.open(file_path, "wb") as f:
-                torch.save(predictions, f)
-
-        self._results = OrderedDict()
-        if "proposals" in predictions[0]:
-            self._eval_box_proposals(predictions)
-        if "instances" in predictions[0]:
-            self._eval_predictions(set(self._tasks), predictions)
-        # Copy so the caller can do whatever with results
-        return copy.deepcopy(self._results)
-
-    def _eval_predictions(self, tasks, predictions):
-        """
-        Evaluate predictions on the given tasks.
-        Fill self._results with the metrics of the tasks.
-        """
-        self._logger.info("Preparing results for COCO format ...")
-        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
-
-        # unmap the category ids for COCO
-        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
-            reverse_id_mapping = {
-                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
-            }
-            for result in coco_results:
-                category_id = result["category_id"]
-                assert (
-                    category_id in reverse_id_mapping
-                ), "A prediction has category_id={}, which is not available in the dataset.".format(
-                    category_id
-                )
-                result["category_id"] = reverse_id_mapping[category_id]
-
-        if self._output_dir:
-            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
-            self._logger.info("Saving results to {}".format(file_path))
-            with PathManager.open(file_path, "w") as f:
-                f.write(json.dumps(coco_results))
-                f.flush()
-
-        if not self._do_evaluation:
-            self._logger.info("Annotations are not available for evaluation.")
-            return
-
-        self._logger.info("Evaluating predictions ...")
-        for task in sorted(tasks):
-            coco_eval = (
-                _evaluate_predictions_on_coco(
-                    self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas
-                )
-                if len(coco_results) > 0
-                else None  # cocoapi does not handle empty results very well
-            )
-
-            res = self._derive_coco_results(
-                coco_eval, task, class_names=self._metadata.get("thing_classes")
-            )
-            self._results[task] = res
-
-    def _eval_box_proposals(self, predictions):
-        """
-        Evaluate the box proposals in predictions.
-        Fill self._results with the metrics for "box_proposals" task.
-        """
-        if self._output_dir:
-            # Saving generated box proposals to file.
-            # Predicted box_proposals are in XYXY_ABS mode.
-            bbox_mode = BoxMode.XYXY_ABS.value
-            ids, boxes, objectness_logits = [], [], []
-            for prediction in predictions:
-                ids.append(prediction["image_id"])
-                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
-                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
-
-            proposal_data = {
-                "boxes": boxes,
-                "objectness_logits": objectness_logits,
-                "ids": ids,
-                "bbox_mode": bbox_mode,
-            }
-            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
-                pickle.dump(proposal_data, f)
-
-        if not self._do_evaluation:
-            self._logger.info("Annotations are not available for evaluation.")
-            return
-
-        self._logger.info("Evaluating bbox proposals ...")
-        res = {}
-        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
-        for limit in [100, 1000]:
-            for area, suffix in areas.items():
-                stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit)
-                key = "AR{}@{:d}".format(suffix, limit)
-                res[key] = float(stats["ar"].item() * 100)
-        self._logger.info("Proposal metrics: \n" + create_small_table(res))
-        self._results["box_proposals"] = res
-
-    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
-        """
-        Derive the desired score numbers from summarized COCOeval.
-
-        Args:
-            coco_eval (None or COCOEval): None represents no predictions from model.
-            iou_type (str):
-            class_names (None or list[str]): if provided, will use it to predict
-                per-category AP.
-
-        Returns:
-            a dict of {metric name: score}
-        """
-
-        metrics = {
-            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
-            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
-            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
-        }[iou_type]
-
-        if coco_eval is None:
-            self._logger.warn("No predictions from the model!")
-            return {metric: float("nan") for metric in metrics}
-
-        # the standard metrics
-        results = {
-            metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
-            for idx, metric in enumerate(metrics)
-        }
-        self._logger.info(
-            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
-        )
-        if not np.isfinite(sum(results.values())):
-            self._logger.info("Note that some metrics cannot be computed.")
-
-        if class_names is None or len(class_names) <= 1:
-            return results
-        # Compute per-category AP
-        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
-        precisions = coco_eval.eval["precision"]
-        # precision has dims (iou, recall, cls, area range, max dets)
-        assert len(class_names) == precisions.shape[2]
-
-        results_per_category = []
-        for idx, name in enumerate(class_names):
-            # area range index 0: all area ranges
-            # max dets index -1: typically 100 per image
-            precision = precisions[:, :, idx, 0, -1]
-            precision = precision[precision > -1]
-            ap = np.mean(precision) if precision.size else float("nan")
-            results_per_category.append(("{}".format(name), float(ap * 100)))
-
-        # tabulate it
-        N_COLS = min(6, len(results_per_category) * 2)
-        results_flatten = list(itertools.chain(*results_per_category))
-        results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
-        table = tabulate(
-            results_2d,
-            tablefmt="pipe",
-            floatfmt=".3f",
-            headers=["category", "AP"] * (N_COLS // 2),
-            numalign="left",
-        )
-        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)
-
-        results.update({"AP-" + name: ap for name, ap in results_per_category})
-        return results
-
-
-def instances_to_coco_json(instances, img_id):
-    """
-    Dump an "Instances" object to a COCO-format json that's used for evaluation.
-
-    Args:
-        instances (Instances):
-        img_id (int): the image id
-
-    Returns:
-        list[dict]: list of json annotations in COCO format.
-    """
-    num_instance = len(instances)
-    if num_instance == 0:
-        return []
-
-    boxes = instances.pred_boxes.tensor.numpy()
-    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
-    boxes = boxes.tolist()
-    scores = instances.scores.tolist()
-    classes = instances.pred_classes.tolist()
-
-    has_mask = instances.has("pred_masks")
-    if has_mask:
-        # use RLE to encode the masks, because they are too large and takes memory
-        # since this evaluator stores outputs of the entire dataset
-        rles = [
-            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
-            for mask in instances.pred_masks
-        ]
-        for rle in rles:
-            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
-            # json writer which always produces strings cannot serialize a bytestream
-            # unless you decode it. Thankfully, utf-8 works out (which is also what
-            # the pycocotools/_mask.pyx does).
-            rle["counts"] = rle["counts"].decode("utf-8")
-
-    has_keypoints = instances.has("pred_keypoints")
-    if has_keypoints:
-        keypoints = instances.pred_keypoints
-
-    results = []
-    for k in range(num_instance):
-        result = {
-            "image_id": img_id,
-            "category_id": classes[k],
-            "bbox": boxes[k],
-            "score": scores[k],
-        }
-        if has_mask:
-            result["segmentation"] = rles[k]
-        if has_keypoints:
-            # In COCO annotations,
-            # keypoints coordinates are pixel indices.
-            # However our predictions are floating point coordinates.
-            # Therefore we subtract 0.5 to be consistent with the annotation format.
-            # This is the inverse of data loading logic in `data/coco.py`.
-            keypoints[k][:, :2] -= 0.5
-            result["keypoints"] = keypoints[k].flatten().tolist()
-        results.append(result)
-    return results
-
-
-# inspired from Detectron:
-# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
-def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
-    """
-    Evaluate detection proposal recall metrics. This function is a much
-    faster alternative to the official COCO API recall evaluation code. However,
-    it produces slightly different results.
-    """
-    # Record max overlap value for each gt box
-    # Return vector of overlap values
-    areas = {
-        "all": 0,
-        "small": 1,
-        "medium": 2,
-        "large": 3,
-        "96-128": 4,
-        "128-256": 5,
-        "256-512": 6,
-        "512-inf": 7,
-    }
-    area_ranges = [
-        [0 ** 2, 1e5 ** 2],  # all
-        [0 ** 2, 32 ** 2],  # small
-        [32 ** 2, 96 ** 2],  # medium
-        [96 ** 2, 1e5 ** 2],  # large
-        [96 ** 2, 128 ** 2],  # 96-128
-        [128 ** 2, 256 ** 2],  # 128-256
-        [256 ** 2, 512 ** 2],  # 256-512
-        [512 ** 2, 1e5 ** 2],
-    ]  # 512-inf
-    assert area in areas, "Unknown area range: {}".format(area)
-    area_range = area_ranges[areas[area]]
-    gt_overlaps = []
-    num_pos = 0
-
-    for prediction_dict in dataset_predictions:
-        predictions = prediction_dict["proposals"]
-
-        # sort predictions in descending order
-        # TODO maybe remove this and make it explicit in the documentation
-        inds = predictions.objectness_logits.sort(descending=True)[1]
-        predictions = predictions[inds]
-
-        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
-        anno = coco_api.loadAnns(ann_ids)
-        gt_boxes = [
-            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
-            for obj in anno
-            if obj["iscrowd"] == 0
-        ]
-        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
-        gt_boxes = Boxes(gt_boxes)
-        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])
-
-        if len(gt_boxes) == 0 or len(predictions) == 0:
-            continue
-
-        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
-        gt_boxes = gt_boxes[valid_gt_inds]
-
-        num_pos += len(gt_boxes)
-
-        if len(gt_boxes) == 0:
-            continue
-
-        if limit is not None and len(predictions) > limit:
-            predictions = predictions[:limit]
-
-        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
-
-        _gt_overlaps = torch.zeros(len(gt_boxes))
-        for j in range(min(len(predictions), len(gt_boxes))):
-            # find which proposal box maximally covers each gt box
-            # and get the iou amount of coverage for each gt box
-            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
-
-            # find which gt box is 'best' covered (i.e. 'best' = most iou)
-            gt_ovr, gt_ind = max_overlaps.max(dim=0)
-            assert gt_ovr >= 0
-            # find the proposal box that covers the best covered gt box
-            box_ind = argmax_overlaps[gt_ind]
-            # record the iou coverage of this gt box
-            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
-            assert _gt_overlaps[j] == gt_ovr
-            # mark the proposal box and the gt box as used
-            overlaps[box_ind, :] = -1
-            overlaps[:, gt_ind] = -1
-
-        # append recorded iou coverage level
-        gt_overlaps.append(_gt_overlaps)
-    gt_overlaps = (
-        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
-    )
-    gt_overlaps, _ = torch.sort(gt_overlaps)
-
-    if thresholds is None:
-        step = 0.05
-        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
-    recalls = torch.zeros_like(thresholds)
-    # compute recall for each iou threshold
-    for i, t in enumerate(thresholds):
-        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
-    # ar = 2 * np.trapz(recalls, thresholds)
-    ar = recalls.mean()
-    return {
-        "ar": ar,
-        "recalls": recalls,
-        "thresholds": thresholds,
-        "gt_overlaps": gt_overlaps,
-        "num_pos": num_pos,
-    }
-
-
-def _evaluate_predictions_on_coco(coco_gt, coco_results, iou_type, kpt_oks_sigmas=None):
-    """
-    Evaluate the coco results using COCOEval API.
-    """
-    assert len(coco_results) > 0
-
-    if iou_type == "segm":
-        coco_results = copy.deepcopy(coco_results)
-        # When evaluating mask AP, if the results contain bbox, cocoapi will
-        # use the box area as the area of the instance, instead of the mask area.
-        # This leads to a different definition of small/medium/large.
-        # We remove the bbox field to let mask AP use mask area.
-        for c in coco_results:
-            c.pop("bbox", None)
-
-    coco_dt = coco_gt.loadRes(coco_results)
-    coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
-    # Use the COCO default keypoint OKS sigmas unless overrides are specified
-    if kpt_oks_sigmas:
-        coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas)
-
-    if iou_type == "keypoints":
-        num_keypoints = len(coco_results[0]["keypoints"]) // 3
-        assert len(coco_eval.params.kpt_oks_sigmas) == num_keypoints, (
-            "[COCOEvaluator] The length of cfg.TEST.KEYPOINT_OKS_SIGMAS (default: 17) "
-            "must be equal to the number of keypoints. However the prediction has {} "
-            "keypoints! For more information please refer to "
-            "http://cocodataset.org/#keypoints-eval.".format(num_keypoints)
-        )
-
-    coco_eval.evaluate()
-    coco_eval.accumulate()
-    coco_eval.summarize()
-
-    return coco_eval
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py
deleted file mode 100644
index dcb9804..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import datetime
-import logging
-import time
-from collections import OrderedDict
-from contextlib import contextmanager
-import torch
-
-from detectron2.utils.comm import get_world_size, is_main_process
-from detectron2.utils.logger import log_every_n_seconds
-
-
-class DatasetEvaluator:
-    """
-    Base class for a dataset evaluator.
-
-    The function :func:`inference_on_dataset` runs the model over
-    all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
-
-    This class will accumulate information of the inputs/outputs (by :meth:`process`),
-    and produce evaluation results in the end (by :meth:`evaluate`).
-    """
-
-    def reset(self):
-        """
-        Preparation for a new round of evaluation.
-        Should be called before starting a round of evaluation.
-        """
-        pass
-
-    def process(self, inputs, outputs):
-        """
-        Process the pair of inputs and outputs.
-        If they contain batches, the pairs can be consumed one-by-one using `zip`:
-
-        .. code-block:: python
-
-            for input_, output in zip(inputs, outputs):
-                # do evaluation on single input/output pair
-                ...
-
-        Args:
-            inputs (list): the inputs that's used to call the model.
-            outputs (list): the return value of `model(inputs)`
-        """
-        pass
-
-    def evaluate(self):
-        """
-        Evaluate/summarize the performance, after processing all input/output pairs.
-
-        Returns:
-            dict:
-                A new evaluator class can return a dict of arbitrary format
-                as long as the user can process the results.
-                In our train_net.py, we expect the following format:
-
-                * key: the name of the task (e.g., bbox)
-                * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
-        """
-        pass
-
-
-class DatasetEvaluators(DatasetEvaluator):
-    """
-    Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
-
-    This class dispatches every evaluation call to
-    all of its :class:`DatasetEvaluator`.
-    """
-
-    def __init__(self, evaluators):
-        """
-        Args:
-            evaluators (list): the evaluators to combine.
-        """
-        super().__init__()
-        self._evaluators = evaluators
-
-    def reset(self):
-        for evaluator in self._evaluators:
-            evaluator.reset()
-
-    def process(self, inputs, outputs):
-        for evaluator in self._evaluators:
-            evaluator.process(inputs, outputs)
-
-    def evaluate(self):
-        results = OrderedDict()
-        for evaluator in self._evaluators:
-            result = evaluator.evaluate()
-            if is_main_process() and result is not None:
-                for k, v in result.items():
-                    assert (
-                        k not in results
-                    ), "Different evaluators produce results with the same key {}".format(k)
-                    results[k] = v
-        return results
-
-
-def inference_on_dataset(model, data_loader, evaluator):
-    """
-    Run model on the data_loader and evaluate the metrics with evaluator.
-    Also benchmark the inference speed of `model.forward` accurately.
-    The model will be used in eval mode.
-
-    Args:
-        model (nn.Module): a module which accepts an object from
-            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
-
-            If you wish to evaluate a model in `training` mode instead, you can
-            wrap the given model and override its behavior of `.eval()` and `.train()`.
-        data_loader: an iterable object with a length.
-            The elements it generates will be the inputs to the model.
-        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
-            to benchmark, but don't want to do any evaluation.
-
-    Returns:
-        The return value of `evaluator.evaluate()`
-    """
-    num_devices = get_world_size()
-    logger = logging.getLogger(__name__)
-    logger.info("Start inference on {} images".format(len(data_loader)))
-
-    total = len(data_loader)  # inference data loader must have a fixed length
-    if evaluator is None:
-        # create a no-op evaluator
-        evaluator = DatasetEvaluators([])
-    evaluator.reset()
-
-    num_warmup = min(5, total - 1)
-    start_time = time.perf_counter()
-    total_compute_time = 0
-    with inference_context(model), torch.no_grad():
-        for idx, inputs in enumerate(data_loader):
-            if idx == num_warmup:
-                start_time = time.perf_counter()
-                total_compute_time = 0
-
-            start_compute_time = time.perf_counter()
-            outputs = model(inputs)
-            if torch.cuda.is_available():
-                torch.cuda.synchronize()
-            total_compute_time += time.perf_counter() - start_compute_time
-            evaluator.process(inputs, outputs)
-
-            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
-            seconds_per_img = total_compute_time / iters_after_start
-            if idx >= num_warmup * 2 or seconds_per_img > 5:
-                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
-                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
-                log_every_n_seconds(
-                    logging.INFO,
-                    "Inference done {}/{}. {:.4f} s / demo. ETA={}".format(
-                        idx + 1, total, seconds_per_img, str(eta)
-                    ),
-                    n=5,
-                )
-
-    # Measure the time only for this worker (before the synchronization barrier)
-    total_time = time.perf_counter() - start_time
-    total_time_str = str(datetime.timedelta(seconds=total_time))
-    # NOTE this format is parsed by grep
-    logger.info(
-        "Total inference time: {} ({:.6f} s / demo per device, on {} devices)".format(
-            total_time_str, total_time / (total - num_warmup), num_devices
-        )
-    )
-    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
-    logger.info(
-        "Total inference pure compute time: {} ({:.6f} s / demo per device, on {} devices)".format(
-            total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
-        )
-    )
-
-    results = evaluator.evaluate()
-    # An evaluator may return None when not in main process.
-    # Replace it by an empty dict instead to make it easier for downstream code to handle
-    if results is None:
-        results = {}
-    return results
-
-
-@contextmanager
-def inference_context(model):
-    """
-    A context where the model is temporarily changed to eval mode,
-    and restored to previous mode afterwards.
-
-    Args:
-        model: a torch Module
-    """
-    training_mode = model.training
-    model.eval()
-    yield
-    model.train(training_mode)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py
deleted file mode 100644
index e55f50f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py
+++ /dev/null
@@ -1,350 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import itertools
-import json
-import logging
-import os
-import pickle
-from collections import OrderedDict
-import torch
-from fvcore.common.file_io import PathManager
-
-import detectron2.utils.comm as comm
-from detectron2.data import MetadataCatalog
-from detectron2.structures import Boxes, BoxMode, pairwise_iou
-from detectron2.utils.logger import create_small_table
-
-from .coco_evaluation import instances_to_coco_json
-from .evaluator import DatasetEvaluator
-
-
-class LVISEvaluator(DatasetEvaluator):
-    """
-    Evaluate object proposal and instance detection/segmentation outputs using
-    LVIS's metrics and evaluation API.
-    """
-
-    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
-        """
-        Args:
-            dataset_name (str): name of the dataset to be evaluated.
-                It must have the following corresponding metadata:
-                "json_file": the path to the LVIS format annotation
-            cfg (CfgNode): config instance
-            distributed (True): if True, will collect results from all ranks for evaluation.
-                Otherwise, will evaluate the results in the current process.
-            output_dir (str): optional, an output directory to dump results.
-        """
-        from lvis import LVIS
-
-        self._tasks = self._tasks_from_config(cfg)
-        self._distributed = distributed
-        self._output_dir = output_dir
-
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-        self._metadata = MetadataCatalog.get(dataset_name)
-        json_file = PathManager.get_local_path(self._metadata.json_file)
-        self._lvis_api = LVIS(json_file)
-        # Test set json files do not contain annotations (evaluation must be
-        # performed using the LVIS evaluation server).
-        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
-
-    def reset(self):
-        self._predictions = []
-
-    def _tasks_from_config(self, cfg):
-        """
-        Returns:
-            tuple[str]: tasks that can be evaluated under the given configuration.
-        """
-        tasks = ("bbox",)
-        if cfg.MODEL.MASK_ON:
-            tasks = tasks + ("segm",)
-        return tasks
-
-    def process(self, inputs, outputs):
-        """
-        Args:
-            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
-                It is a list of dict. Each dict corresponds to an image and
-                contains keys like "height", "width", "file_name", "image_id".
-            outputs: the outputs of a LVIS model. It is a list of dicts with key
-                "instances" that contains :class:`Instances`.
-        """
-        for input, output in zip(inputs, outputs):
-            prediction = {"image_id": input["image_id"]}
-
-            if "instances" in output:
-                instances = output["instances"].to(self._cpu_device)
-                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
-            if "proposals" in output:
-                prediction["proposals"] = output["proposals"].to(self._cpu_device)
-            self._predictions.append(prediction)
-
-    def evaluate(self):
-        if self._distributed:
-            comm.synchronize()
-            predictions = comm.gather(self._predictions, dst=0)
-            predictions = list(itertools.chain(*predictions))
-
-            if not comm.is_main_process():
-                return
-        else:
-            predictions = self._predictions
-
-        if len(predictions) == 0:
-            self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
-            return {}
-
-        if self._output_dir:
-            PathManager.mkdirs(self._output_dir)
-            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
-            with PathManager.open(file_path, "wb") as f:
-                torch.save(predictions, f)
-
-        self._results = OrderedDict()
-        if "proposals" in predictions[0]:
-            self._eval_box_proposals(predictions)
-        if "instances" in predictions[0]:
-            self._eval_predictions(set(self._tasks), predictions)
-        # Copy so the caller can do whatever with results
-        return copy.deepcopy(self._results)
-
-    def _eval_predictions(self, tasks, predictions):
-        """
-        Evaluate predictions on the given tasks.
-        Fill self._results with the metrics of the tasks.
-
-        Args:
-            predictions (list[dict]): list of outputs from the model
-        """
-        self._logger.info("Preparing results in the LVIS format ...")
-        lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
-
-        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
-        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
-        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
-            reverse_id_mapping = {
-                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
-            }
-            for result in lvis_results:
-                result["category_id"] = reverse_id_mapping[result["category_id"]]
-        else:
-            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
-            for result in lvis_results:
-                result["category_id"] += 1
-
-        if self._output_dir:
-            file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
-            self._logger.info("Saving results to {}".format(file_path))
-            with PathManager.open(file_path, "w") as f:
-                f.write(json.dumps(lvis_results))
-                f.flush()
-
-        if not self._do_evaluation:
-            self._logger.info("Annotations are not available for evaluation.")
-            return
-
-        self._logger.info("Evaluating predictions ...")
-        for task in sorted(tasks):
-            res = _evaluate_predictions_on_lvis(
-                self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes")
-            )
-            self._results[task] = res
-
-    def _eval_box_proposals(self, predictions):
-        """
-        Evaluate the box proposals in predictions.
-        Fill self._results with the metrics for "box_proposals" task.
-        """
-        if self._output_dir:
-            # Saving generated box proposals to file.
-            # Predicted box_proposals are in XYXY_ABS mode.
-            bbox_mode = BoxMode.XYXY_ABS.value
-            ids, boxes, objectness_logits = [], [], []
-            for prediction in predictions:
-                ids.append(prediction["image_id"])
-                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
-                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
-
-            proposal_data = {
-                "boxes": boxes,
-                "objectness_logits": objectness_logits,
-                "ids": ids,
-                "bbox_mode": bbox_mode,
-            }
-            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
-                pickle.dump(proposal_data, f)
-
-        if not self._do_evaluation:
-            self._logger.info("Annotations are not available for evaluation.")
-            return
-
-        self._logger.info("Evaluating bbox proposals ...")
-        res = {}
-        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
-        for limit in [100, 1000]:
-            for area, suffix in areas.items():
-                stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
-                key = "AR{}@{:d}".format(suffix, limit)
-                res[key] = float(stats["ar"].item() * 100)
-        self._logger.info("Proposal metrics: \n" + create_small_table(res))
-        self._results["box_proposals"] = res
-
-
-# inspired from Detectron:
-# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
-def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
-    """
-    Evaluate detection proposal recall metrics. This function is a much
-    faster alternative to the official LVIS API recall evaluation code. However,
-    it produces slightly different results.
-    """
-    # Record max overlap value for each gt box
-    # Return vector of overlap values
-    areas = {
-        "all": 0,
-        "small": 1,
-        "medium": 2,
-        "large": 3,
-        "96-128": 4,
-        "128-256": 5,
-        "256-512": 6,
-        "512-inf": 7,
-    }
-    area_ranges = [
-        [0 ** 2, 1e5 ** 2],  # all
-        [0 ** 2, 32 ** 2],  # small
-        [32 ** 2, 96 ** 2],  # medium
-        [96 ** 2, 1e5 ** 2],  # large
-        [96 ** 2, 128 ** 2],  # 96-128
-        [128 ** 2, 256 ** 2],  # 128-256
-        [256 ** 2, 512 ** 2],  # 256-512
-        [512 ** 2, 1e5 ** 2],
-    ]  # 512-inf
-    assert area in areas, "Unknown area range: {}".format(area)
-    area_range = area_ranges[areas[area]]
-    gt_overlaps = []
-    num_pos = 0
-
-    for prediction_dict in dataset_predictions:
-        predictions = prediction_dict["proposals"]
-
-        # sort predictions in descending order
-        # TODO maybe remove this and make it explicit in the documentation
-        inds = predictions.objectness_logits.sort(descending=True)[1]
-        predictions = predictions[inds]
-
-        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
-        anno = lvis_api.load_anns(ann_ids)
-        gt_boxes = [
-            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
-        ]
-        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
-        gt_boxes = Boxes(gt_boxes)
-        gt_areas = torch.as_tensor([obj["area"] for obj in anno])
-
-        if len(gt_boxes) == 0 or len(predictions) == 0:
-            continue
-
-        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
-        gt_boxes = gt_boxes[valid_gt_inds]
-
-        num_pos += len(gt_boxes)
-
-        if len(gt_boxes) == 0:
-            continue
-
-        if limit is not None and len(predictions) > limit:
-            predictions = predictions[:limit]
-
-        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
-
-        _gt_overlaps = torch.zeros(len(gt_boxes))
-        for j in range(min(len(predictions), len(gt_boxes))):
-            # find which proposal box maximally covers each gt box
-            # and get the iou amount of coverage for each gt box
-            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
-
-            # find which gt box is 'best' covered (i.e. 'best' = most iou)
-            gt_ovr, gt_ind = max_overlaps.max(dim=0)
-            assert gt_ovr >= 0
-            # find the proposal box that covers the best covered gt box
-            box_ind = argmax_overlaps[gt_ind]
-            # record the iou coverage of this gt box
-            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
-            assert _gt_overlaps[j] == gt_ovr
-            # mark the proposal box and the gt box as used
-            overlaps[box_ind, :] = -1
-            overlaps[:, gt_ind] = -1
-
-        # append recorded iou coverage level
-        gt_overlaps.append(_gt_overlaps)
-    gt_overlaps = (
-        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
-    )
-    gt_overlaps, _ = torch.sort(gt_overlaps)
-
-    if thresholds is None:
-        step = 0.05
-        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
-    recalls = torch.zeros_like(thresholds)
-    # compute recall for each iou threshold
-    for i, t in enumerate(thresholds):
-        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
-    # ar = 2 * np.trapz(recalls, thresholds)
-    ar = recalls.mean()
-    return {
-        "ar": ar,
-        "recalls": recalls,
-        "thresholds": thresholds,
-        "gt_overlaps": gt_overlaps,
-        "num_pos": num_pos,
-    }
-
-
-def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None):
-    """
-    Args:
-        iou_type (str):
-        kpt_oks_sigmas (list[float]):
-        class_names (None or list[str]): if provided, will use it to predict
-            per-category AP.
-
-    Returns:
-        a dict of {metric name: score}
-    """
-    metrics = {
-        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
-        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
-    }[iou_type]
-
-    logger = logging.getLogger(__name__)
-
-    if len(lvis_results) == 0:  # TODO: check if needed
-        logger.warn("No predictions from the model!")
-        return {metric: float("nan") for metric in metrics}
-
-    if iou_type == "segm":
-        lvis_results = copy.deepcopy(lvis_results)
-        # When evaluating mask AP, if the results contain bbox, LVIS API will
-        # use the box area as the area of the instance, instead of the mask area.
-        # This leads to a different definition of small/medium/large.
-        # We remove the bbox field to let mask AP use mask area.
-        for c in lvis_results:
-            c.pop("bbox", None)
-
-    from lvis import LVISEval, LVISResults
-
-    lvis_results = LVISResults(lvis_gt, lvis_results)
-    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
-    lvis_eval.run()
-    lvis_eval.print_results()
-
-    # Pull the standard metrics from the LVIS results
-    results = lvis_eval.get_results()
-    results = {metric: float(results[metric] * 100) for metric in metrics}
-    logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
-    return results
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py
deleted file mode 100644
index fb5e7ab..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import contextlib
-import io
-import itertools
-import json
-import logging
-import os
-import tempfile
-from collections import OrderedDict
-from fvcore.common.file_io import PathManager
-from PIL import Image
-from tabulate import tabulate
-
-from detectron2.data import MetadataCatalog
-from detectron2.utils import comm
-
-from .evaluator import DatasetEvaluator
-
-logger = logging.getLogger(__name__)
-
-
-class COCOPanopticEvaluator(DatasetEvaluator):
-    """
-    Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
-    It saves panoptic segmentation prediction in `output_dir`
-
-    It contains a synchronize call and has to be called from all workers.
-    """
-
-    def __init__(self, dataset_name, output_dir):
-        """
-        Args:
-            dataset_name (str): name of the dataset
-            output_dir (str): output directory to save results for evaluation
-        """
-        self._metadata = MetadataCatalog.get(dataset_name)
-        self._thing_contiguous_id_to_dataset_id = {
-            v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
-        }
-        self._stuff_contiguous_id_to_dataset_id = {
-            v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items()
-        }
-
-        self._predictions_json = os.path.join(output_dir, "predictions.json")
-
-    def reset(self):
-        self._predictions = []
-
-    def _convert_category_id(self, segment_info):
-        isthing = segment_info.pop("isthing", None)
-        if isthing is None:
-            # the model produces panoptic category id directly. No more conversion needed
-            return segment_info
-        if isthing is True:
-            segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[
-                segment_info["category_id"]
-            ]
-        else:
-            segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[
-                segment_info["category_id"]
-            ]
-        return segment_info
-
-    def process(self, inputs, outputs):
-        from panopticapi.utils import id2rgb
-
-        for input, output in zip(inputs, outputs):
-            panoptic_img, segments_info = output["panoptic_seg"]
-            panoptic_img = panoptic_img.cpu().numpy()
-
-            file_name = os.path.basename(input["file_name"])
-            file_name_png = os.path.splitext(file_name)[0] + ".png"
-            with io.BytesIO() as out:
-                Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
-                segments_info = [self._convert_category_id(x) for x in segments_info]
-                self._predictions.append(
-                    {
-                        "image_id": input["image_id"],
-                        "file_name": file_name_png,
-                        "png_string": out.getvalue(),
-                        "segments_info": segments_info,
-                    }
-                )
-
-    def evaluate(self):
-        comm.synchronize()
-
-        self._predictions = comm.gather(self._predictions)
-        self._predictions = list(itertools.chain(*self._predictions))
-        if not comm.is_main_process():
-            return
-
-        # PanopticApi requires local files
-        gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
-        gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)
-
-        with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
-            logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
-            for p in self._predictions:
-                with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
-                    f.write(p.pop("png_string"))
-
-            with open(gt_json, "r") as f:
-                json_data = json.load(f)
-            json_data["annotations"] = self._predictions
-            with PathManager.open(self._predictions_json, "w") as f:
-                f.write(json.dumps(json_data))
-
-            from panopticapi.evaluation import pq_compute
-
-            with contextlib.redirect_stdout(io.StringIO()):
-                pq_res = pq_compute(
-                    gt_json,
-                    PathManager.get_local_path(self._predictions_json),
-                    gt_folder=gt_folder,
-                    pred_folder=pred_dir,
-                )
-
-        res = {}
-        res["PQ"] = 100 * pq_res["All"]["pq"]
-        res["SQ"] = 100 * pq_res["All"]["sq"]
-        res["RQ"] = 100 * pq_res["All"]["rq"]
-        res["PQ_th"] = 100 * pq_res["Things"]["pq"]
-        res["SQ_th"] = 100 * pq_res["Things"]["sq"]
-        res["RQ_th"] = 100 * pq_res["Things"]["rq"]
-        res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
-        res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
-        res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
-
-        results = OrderedDict({"panoptic_seg": res})
-        _print_panoptic_results(pq_res)
-
-        return results
-
-
-def _print_panoptic_results(pq_res):
-    headers = ["", "PQ", "SQ", "RQ", "#categories"]
-    data = []
-    for name in ["All", "Things", "Stuff"]:
-        row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]]
-        data.append(row)
-    table = tabulate(
-        data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center"
-    )
-    logger.info("Panoptic Evaluation Results:\n" + table)
-
-
-if __name__ == "__main__":
-    from detectron2.utils.logger import setup_logger
-
-    logger = setup_logger()
-    import argparse
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--gt-json")
-    parser.add_argument("--gt-dir")
-    parser.add_argument("--pred-json")
-    parser.add_argument("--pred-dir")
-    args = parser.parse_args()
-
-    from panopticapi.evaluation import pq_compute
-
-    with contextlib.redirect_stdout(io.StringIO()):
-        pq_res = pq_compute(
-            args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir
-        )
-        _print_panoptic_results(pq_res)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py
deleted file mode 100644
index 22d2e52..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import logging
-import numpy as np
-import os
-import tempfile
-import xml.etree.ElementTree as ET
-from collections import OrderedDict, defaultdict
-from functools import lru_cache
-import torch
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import MetadataCatalog
-from detectron2.utils import comm
-
-from .evaluator import DatasetEvaluator
-
-
-class PascalVOCDetectionEvaluator(DatasetEvaluator):
-    """
-    Evaluate Pascal VOC AP.
-    It contains a synchronization, therefore has to be called from all ranks.
-
-    Note that this is a rewrite of the official Matlab API.
-    The results should be similar, but not identical to the one produced by
-    the official API.
-    """
-
-    def __init__(self, dataset_name):
-        """
-        Args:
-            dataset_name (str): name of the dataset, e.g., "voc_2007_test"
-        """
-        self._dataset_name = dataset_name
-        meta = MetadataCatalog.get(dataset_name)
-        self._anno_file_template = os.path.join(meta.dirname, "Annotations", "{}.xml")
-        self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt")
-        self._class_names = meta.thing_classes
-        assert meta.year in [2007, 2012], meta.year
-        self._is_2007 = meta.year == 2007
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-    def reset(self):
-        self._predictions = defaultdict(list)  # class name -> list of prediction strings
-
-    def process(self, inputs, outputs):
-        for input, output in zip(inputs, outputs):
-            image_id = input["image_id"]
-            instances = output["instances"].to(self._cpu_device)
-            boxes = instances.pred_boxes.tensor.numpy()
-            scores = instances.scores.tolist()
-            classes = instances.pred_classes.tolist()
-            for box, score, cls in zip(boxes, scores, classes):
-                xmin, ymin, xmax, ymax = box
-                # The inverse of data loading logic in `data/pascal_voc.py`
-                xmin += 1
-                ymin += 1
-                self._predictions[cls].append(
-                    f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"
-                )
-
-    def evaluate(self):
-        """
-        Returns:
-            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
-        """
-        all_predictions = comm.gather(self._predictions, dst=0)
-        if not comm.is_main_process():
-            return
-        predictions = defaultdict(list)
-        for predictions_per_rank in all_predictions:
-            for clsid, lines in predictions_per_rank.items():
-                predictions[clsid].extend(lines)
-        del all_predictions
-
-        self._logger.info(
-            "Evaluating {} using {} metric. "
-            "Note that results do not use the official Matlab API.".format(
-                self._dataset_name, 2007 if self._is_2007 else 2012
-            )
-        )
-
-        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
-            res_file_template = os.path.join(dirname, "{}.txt")
-
-            aps = defaultdict(list)  # iou -> ap per class
-            for cls_id, cls_name in enumerate(self._class_names):
-                lines = predictions.get(cls_id, [""])
-
-                with open(res_file_template.format(cls_name), "w") as f:
-                    f.write("\n".join(lines))
-
-                for thresh in range(50, 100, 5):
-                    rec, prec, ap = voc_eval(
-                        res_file_template,
-                        self._anno_file_template,
-                        self._image_set_path,
-                        cls_name,
-                        ovthresh=thresh / 100.0,
-                        use_07_metric=self._is_2007,
-                    )
-                    aps[thresh].append(ap * 100)
-
-        ret = OrderedDict()
-        mAP = {iou: np.mean(x) for iou, x in aps.items()}
-        ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]}
-        return ret
-
-
-##############################################################################
-#
-# Below code is modified from
-# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
-# --------------------------------------------------------
-# Fast/er R-CNN
-# Licensed under The MIT License [see LICENSE for details]
-# Written by Bharath Hariharan
-# --------------------------------------------------------
-
-"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
-
-
-@lru_cache(maxsize=None)
-def parse_rec(filename):
-    """Parse a PASCAL VOC xml file."""
-    with PathManager.open(filename) as f:
-        tree = ET.parse(f)
-    objects = []
-    for obj in tree.findall("object"):
-        obj_struct = {}
-        obj_struct["name"] = obj.find("name").text
-        obj_struct["pose"] = obj.find("pose").text
-        obj_struct["truncated"] = int(obj.find("truncated").text)
-        obj_struct["difficult"] = int(obj.find("difficult").text)
-        bbox = obj.find("bndbox")
-        obj_struct["bbox"] = [
-            int(bbox.find("xmin").text),
-            int(bbox.find("ymin").text),
-            int(bbox.find("xmax").text),
-            int(bbox.find("ymax").text),
-        ]
-        objects.append(obj_struct)
-
-    return objects
-
-
-def voc_ap(rec, prec, use_07_metric=False):
-    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
-    the VOC 07 11-point method (default:False).
-    """
-    if use_07_metric:
-        # 11 point metric
-        ap = 0.0
-        for t in np.arange(0.0, 1.1, 0.1):
-            if np.sum(rec >= t) == 0:
-                p = 0
-            else:
-                p = np.max(prec[rec >= t])
-            ap = ap + p / 11.0
-    else:
-        # correct AP calculation
-        # first append sentinel values at the end
-        mrec = np.concatenate(([0.0], rec, [1.0]))
-        mpre = np.concatenate(([0.0], prec, [0.0]))
-
-        # compute the precision envelope
-        for i in range(mpre.size - 1, 0, -1):
-            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
-
-        # to calculate area under PR curve, look for points
-        # where X axis (recall) changes value
-        i = np.where(mrec[1:] != mrec[:-1])[0]
-
-        # and sum (\Delta recall) * prec
-        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
-    return ap
-
-
-def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
-    """rec, prec, ap = voc_eval(detpath,
-                                annopath,
-                                imagesetfile,
-                                classname,
-                                [ovthresh],
-                                [use_07_metric])
-
-    Top level function that does the PASCAL VOC evaluation.
-
-    detpath: Path to detections
-        detpath.format(classname) should produce the detection results file.
-    annopath: Path to annotations
-        annopath.format(imagename) should be the xml annotations file.
-    imagesetfile: Text file containing the list of images, one image per line.
-    classname: Category name (duh)
-    [ovthresh]: Overlap threshold (default = 0.5)
-    [use_07_metric]: Whether to use VOC07's 11 point AP computation
-        (default False)
-    """
-    # assumes detections are in detpath.format(classname)
-    # assumes annotations are in annopath.format(imagename)
-    # assumes imagesetfile is a text file with each line an image name
-
-    # first load gt
-    # read list of images
-    with PathManager.open(imagesetfile, "r") as f:
-        lines = f.readlines()
-    imagenames = [x.strip() for x in lines]
-
-    # load annots
-    recs = {}
-    for imagename in imagenames:
-        recs[imagename] = parse_rec(annopath.format(imagename))
-
-    # extract gt objects for this class
-    class_recs = {}
-    npos = 0
-    for imagename in imagenames:
-        R = [obj for obj in recs[imagename] if obj["name"] == classname]
-        bbox = np.array([x["bbox"] for x in R])
-        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
-        # difficult = np.array([False for x in R]).astype(np.bool)  # treat all "difficult" as GT
-        det = [False] * len(R)
-        npos = npos + sum(~difficult)
-        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}
-
-    # read dets
-    detfile = detpath.format(classname)
-    with open(detfile, "r") as f:
-        lines = f.readlines()
-
-    splitlines = [x.strip().split(" ") for x in lines]
-    image_ids = [x[0] for x in splitlines]
-    confidence = np.array([float(x[1]) for x in splitlines])
-    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)
-
-    # sort by confidence
-    sorted_ind = np.argsort(-confidence)
-    BB = BB[sorted_ind, :]
-    image_ids = [image_ids[x] for x in sorted_ind]
-
-    # go down dets and mark TPs and FPs
-    nd = len(image_ids)
-    tp = np.zeros(nd)
-    fp = np.zeros(nd)
-    for d in range(nd):
-        R = class_recs[image_ids[d]]
-        bb = BB[d, :].astype(float)
-        ovmax = -np.inf
-        BBGT = R["bbox"].astype(float)
-
-        if BBGT.size > 0:
-            # compute overlaps
-            # intersection
-            ixmin = np.maximum(BBGT[:, 0], bb[0])
-            iymin = np.maximum(BBGT[:, 1], bb[1])
-            ixmax = np.minimum(BBGT[:, 2], bb[2])
-            iymax = np.minimum(BBGT[:, 3], bb[3])
-            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
-            ih = np.maximum(iymax - iymin + 1.0, 0.0)
-            inters = iw * ih
-
-            # union
-            uni = (
-                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
-                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
-                - inters
-            )
-
-            overlaps = inters / uni
-            ovmax = np.max(overlaps)
-            jmax = np.argmax(overlaps)
-
-        if ovmax > ovthresh:
-            if not R["difficult"][jmax]:
-                if not R["det"][jmax]:
-                    tp[d] = 1.0
-                    R["det"][jmax] = 1
-                else:
-                    fp[d] = 1.0
-        else:
-            fp[d] = 1.0
-
-    # compute precision recall
-    fp = np.cumsum(fp)
-    tp = np.cumsum(tp)
-    rec = tp / float(npos)
-    # avoid divide by zero in case the first detection matches a difficult
-    # ground truth
-    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
-    ap = voc_ap(rec, prec, use_07_metric)
-
-    return rec, prec, ap
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py
deleted file mode 100644
index 30746e1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-import json
-import numpy as np
-import os
-import torch
-from fvcore.common.file_io import PathManager
-from pycocotools.cocoeval import COCOeval, maskUtils
-
-from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated
-
-from .coco_evaluation import COCOEvaluator
-
-
-class RotatedCOCOeval(COCOeval):
-    @staticmethod
-    def is_rotated(box_list):
-        if type(box_list) == np.ndarray:
-            return box_list.shape[1] == 5
-        elif type(box_list) == list:
-            if box_list == []:  # cannot decide the box_dim
-                return False
-            return np.all(
-                np.array(
-                    [
-                        (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray))
-                        for obj in box_list
-                    ]
-                )
-            )
-        return False
-
-    @staticmethod
-    def boxlist_to_tensor(boxlist, output_box_dim):
-        if type(boxlist) == np.ndarray:
-            box_tensor = torch.from_numpy(boxlist)
-        elif type(boxlist) == list:
-            if boxlist == []:
-                return torch.zeros((0, output_box_dim), dtype=torch.float32)
-            else:
-                box_tensor = torch.FloatTensor(boxlist)
-        else:
-            raise Exception("Unrecognized boxlist type")
-
-        input_box_dim = box_tensor.shape[1]
-        if input_box_dim != output_box_dim:
-            if input_box_dim == 4 and output_box_dim == 5:
-                box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
-            else:
-                raise Exception(
-                    "Unable to convert from {}-dim box to {}-dim box".format(
-                        input_box_dim, output_box_dim
-                    )
-                )
-        return box_tensor
-
-    def compute_iou_dt_gt(self, dt, gt, is_crowd):
-        if self.is_rotated(dt) or self.is_rotated(gt):
-            # TODO: take is_crowd into consideration
-            assert all(c == 0 for c in is_crowd)
-            dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
-            gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
-            return pairwise_iou_rotated(dt, gt)
-        else:
-            # This is the same as the classical COCO evaluation
-            return maskUtils.iou(dt, gt, is_crowd)
-
-    def computeIoU(self, imgId, catId):
-        p = self.params
-        if p.useCats:
-            gt = self._gts[imgId, catId]
-            dt = self._dts[imgId, catId]
-        else:
-            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
-            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
-        if len(gt) == 0 and len(dt) == 0:
-            return []
-        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
-        dt = [dt[i] for i in inds]
-        if len(dt) > p.maxDets[-1]:
-            dt = dt[0 : p.maxDets[-1]]
-
-        assert p.iouType == "bbox", "unsupported iouType for iou computation"
-
-        g = [g["bbox"] for g in gt]
-        d = [d["bbox"] for d in dt]
-
-        # compute iou between each dt and gt region
-        iscrowd = [int(o["iscrowd"]) for o in gt]
-
-        # Note: this function is copied from cocoeval.py in cocoapi
-        # and the major difference is here.
-        ious = self.compute_iou_dt_gt(d, g, iscrowd)
-        return ious
-
-
-class RotatedCOCOEvaluator(COCOEvaluator):
-    """
-    Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs,
-    with rotated boxes support.
-    Note: this uses IOU only and does not consider angle differences.
-    """
-
-    def process(self, inputs, outputs):
-        """
-        Args:
-            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
-                It is a list of dict. Each dict corresponds to an image and
-                contains keys like "height", "width", "file_name", "image_id".
-            outputs: the outputs of a COCO model. It is a list of dicts with key
-                "instances" that contains :class:`Instances`.
-        """
-        for input, output in zip(inputs, outputs):
-            prediction = {"image_id": input["image_id"]}
-
-            if "instances" in output:
-                instances = output["instances"].to(self._cpu_device)
-
-                prediction["instances"] = self.instances_to_json(instances, input["image_id"])
-            if "proposals" in output:
-                prediction["proposals"] = output["proposals"].to(self._cpu_device)
-            self._predictions.append(prediction)
-
-    def instances_to_json(self, instances, img_id):
-        num_instance = len(instances)
-        if num_instance == 0:
-            return []
-
-        boxes = instances.pred_boxes.tensor.numpy()
-        if boxes.shape[1] == 4:
-            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
-        boxes = boxes.tolist()
-        scores = instances.scores.tolist()
-        classes = instances.pred_classes.tolist()
-
-        results = []
-        for k in range(num_instance):
-            result = {
-                "image_id": img_id,
-                "category_id": classes[k],
-                "bbox": boxes[k],
-                "score": scores[k],
-            }
-
-            results.append(result)
-        return results
-
-    def _eval_predictions(self, tasks, predictions):
-        """
-        Evaluate predictions on the given tasks.
-        Fill self._results with the metrics of the tasks.
-        """
-        self._logger.info("Preparing results for COCO format ...")
-        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
-
-        # unmap the category ids for COCO
-        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
-            reverse_id_mapping = {
-                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
-            }
-            for result in coco_results:
-                result["category_id"] = reverse_id_mapping[result["category_id"]]
-
-        if self._output_dir:
-            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
-            self._logger.info("Saving results to {}".format(file_path))
-            with PathManager.open(file_path, "w") as f:
-                f.write(json.dumps(coco_results))
-                f.flush()
-
-        if not self._do_evaluation:
-            self._logger.info("Annotations are not available for evaluation.")
-            return
-
-        self._logger.info("Evaluating predictions ...")
-        for task in sorted(tasks):
-            assert task == "bbox", "Task {} is not supported".format(task)
-            coco_eval = (
-                self._evaluate_predictions_on_coco(self._coco_api, coco_results)
-                if len(coco_results) > 0
-                else None  # cocoapi does not handle empty results very well
-            )
-
-            res = self._derive_coco_results(
-                coco_eval, task, class_names=self._metadata.get("thing_classes")
-            )
-            self._results[task] = res
-
-    def _evaluate_predictions_on_coco(self, coco_gt, coco_results):
-        """
-        Evaluate the coco results using COCOEval API.
-        """
-        assert len(coco_results) > 0
-
-        coco_dt = coco_gt.loadRes(coco_results)
-
-        # Only bbox is supported for now
-        coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox")
-
-        coco_eval.evaluate()
-        coco_eval.accumulate()
-        coco_eval.summarize()
-
-        return coco_eval
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py
deleted file mode 100644
index fb3b28d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-import json
-import logging
-import numpy as np
-import os
-from collections import OrderedDict
-import PIL.Image as Image
-import pycocotools.mask as mask_util
-import torch
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.utils.comm import all_gather, is_main_process, synchronize
-
-from .evaluator import DatasetEvaluator
-
-
-class SemSegEvaluator(DatasetEvaluator):
-    """
-    Evaluate semantic segmentation
-    """
-
-    def __init__(self, dataset_name, distributed, num_classes, ignore_label=255, output_dir=None):
-        """
-        Args:
-            dataset_name (str): name of the dataset to be evaluated.
-            distributed (True): if True, will collect results from all ranks for evaluation.
-                Otherwise, will evaluate the results in the current process.
-            num_classes (int): number of classes
-            ignore_label (int): value in semantic segmentation ground truth. Predictions for the
-            corresponding pixels should be ignored.
-            output_dir (str): an output directory to dump results.
-        """
-        self._dataset_name = dataset_name
-        self._distributed = distributed
-        self._output_dir = output_dir
-        self._num_classes = num_classes
-        self._ignore_label = ignore_label
-        self._N = num_classes + 1
-
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-        self.input_file_to_gt_file = {
-            dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
-            for dataset_record in DatasetCatalog.get(dataset_name)
-        }
-
-        meta = MetadataCatalog.get(dataset_name)
-        # Dict that maps contiguous training ids to COCO category ids
-        try:
-            c2d = meta.stuff_dataset_id_to_contiguous_id
-            self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
-        except AttributeError:
-            self._contiguous_id_to_dataset_id = None
-        self._class_names = meta.stuff_classes
-
-    def reset(self):
-        self._conf_matrix = np.zeros((self._N, self._N), dtype=np.int64)
-        self._predictions = []
-
-    def process(self, inputs, outputs):
-        """
-        Args:
-            inputs: the inputs to a model.
-                It is a list of dicts. Each dict corresponds to an image and
-                contains keys like "height", "width", "file_name".
-            outputs: the outputs of a model. It is either list of semantic segmentation predictions
-                (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
-                segmentation prediction in the same format.
-        """
-        for input, output in zip(inputs, outputs):
-            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
-            pred = np.array(output, dtype=np.int)
-            with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f:
-                gt = np.array(Image.open(f), dtype=np.int)
-
-            gt[gt == self._ignore_label] = self._num_classes
-
-            self._conf_matrix += np.bincount(
-                self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N ** 2
-            ).reshape(self._N, self._N)
-
-            self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
-
-    def evaluate(self):
-        """
-        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
-
-        * Mean intersection-over-union averaged across classes (mIoU)
-        * Frequency Weighted IoU (fwIoU)
-        * Mean pixel accuracy averaged across classes (mACC)
-        * Pixel Accuracy (pACC)
-        """
-        if self._distributed:
-            synchronize()
-            conf_matrix_list = all_gather(self._conf_matrix)
-            self._predictions = all_gather(self._predictions)
-            self._predictions = list(itertools.chain(*self._predictions))
-            if not is_main_process():
-                return
-
-            self._conf_matrix = np.zeros_like(self._conf_matrix)
-            for conf_matrix in conf_matrix_list:
-                self._conf_matrix += conf_matrix
-
-        if self._output_dir:
-            PathManager.mkdirs(self._output_dir)
-            file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
-            with PathManager.open(file_path, "w") as f:
-                f.write(json.dumps(self._predictions))
-
-        acc = np.full(self._num_classes, np.nan, dtype=np.float)
-        iou = np.full(self._num_classes, np.nan, dtype=np.float)
-        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
-        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
-        class_weights = pos_gt / np.sum(pos_gt)
-        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
-        acc_valid = pos_gt > 0
-        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
-        iou_valid = (pos_gt + pos_pred) > 0
-        union = pos_gt + pos_pred - tp
-        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
-        macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
-        miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
-        fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
-        pacc = np.sum(tp) / np.sum(pos_gt)
-
-        res = {}
-        res["mIoU"] = 100 * miou
-        res["fwIoU"] = 100 * fiou
-        for i, name in enumerate(self._class_names):
-            res["IoU-{}".format(name)] = 100 * iou[i]
-        res["mACC"] = 100 * macc
-        res["pACC"] = 100 * pacc
-        for i, name in enumerate(self._class_names):
-            res["ACC-{}".format(name)] = 100 * acc[i]
-
-        if self._output_dir:
-            file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
-            with PathManager.open(file_path, "wb") as f:
-                torch.save(res, f)
-        results = OrderedDict({"sem_seg": res})
-        self._logger.info(results)
-        return results
-
-    def encode_json_sem_seg(self, sem_seg, input_file_name):
-        """
-        Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
-        See http://cocodataset.org/#format-results
-        """
-        json_list = []
-        for label in np.unique(sem_seg):
-            if self._contiguous_id_to_dataset_id is not None:
-                assert (
-                    label in self._contiguous_id_to_dataset_id
-                ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
-                dataset_id = self._contiguous_id_to_dataset_id[label]
-            else:
-                dataset_id = int(label)
-            mask = (sem_seg == label).astype(np.uint8)
-            mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
-            mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
-            json_list.append(
-                {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
-            )
-        return json_list
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py
deleted file mode 100644
index 95addeb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import numpy as np
-import pprint
-import sys
-from collections import OrderedDict
-from collections.abc import Mapping
-
-
-def print_csv_format(results):
-    """
-    Print main metrics in a format similar to Detectron,
-    so that they are easy to copypaste into a spreadsheet.
-
-    Args:
-        results (OrderedDict[dict]): task_name -> {metric -> score}
-    """
-    assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
-    logger = logging.getLogger(__name__)
-    for task, res in results.items():
-        # Don't print "AP-category" metrics since they are usually not tracked.
-        important_res = [(k, v) for k, v in res.items() if "-" not in k]
-        logger.info("copypaste: Task: {}".format(task))
-        logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
-        logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
-
-
-def verify_results(cfg, results):
-    """
-    Args:
-        results (OrderedDict[dict]): task_name -> {metric -> score}
-
-    Returns:
-        bool: whether the verification succeeds or not
-    """
-    expected_results = cfg.TEST.EXPECTED_RESULTS
-    if not len(expected_results):
-        return True
-
-    ok = True
-    for task, metric, expected, tolerance in expected_results:
-        actual = results[task][metric]
-        if not np.isfinite(actual):
-            ok = False
-        diff = abs(actual - expected)
-        if diff > tolerance:
-            ok = False
-
-    logger = logging.getLogger(__name__)
-    if not ok:
-        logger.error("Result verification failed!")
-        logger.error("Expected Results: " + str(expected_results))
-        logger.error("Actual Results: " + pprint.pformat(results))
-
-        sys.exit(1)
-    else:
-        logger.info("Results verification passed.")
-    return ok
-
-
-def flatten_results_dict(results):
-    """
-    Expand a hierarchical dict of scalars into a flat dict of scalars.
-    If results[k1][k2][k3] = v, the returned dict will have the entry
-    {"k1/k2/k3": v}.
-
-    Args:
-        results (dict):
-    """
-    r = {}
-    for k, v in results.items():
-        if isinstance(v, Mapping):
-            v = flatten_results_dict(v)
-            for kk, vv in v.items():
-                r[k + "/" + kk] = vv
-        else:
-            r[k] = v
-    return r
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md
deleted file mode 100644
index 9bd8b57..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-
-This directory contains code to prepare a detectron2 model for deployment.
-Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
-
-Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
-
-
-### Acknowledgements
-
-Thanks to Mobile Vision team at Facebook for developing the conversion tools.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py
deleted file mode 100644
index 1e2bf4d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from .api import *
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py
deleted file mode 100644
index a760071..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-import copy
-import logging
-import os
-import torch
-from caffe2.proto import caffe2_pb2
-from torch import nn
-
-from detectron2.config import CfgNode as CN
-
-from .caffe2_export import export_caffe2_detection_model
-from .caffe2_export import export_onnx_model as export_onnx_model_impl
-from .caffe2_export import run_and_save_graph
-from .caffe2_inference import ProtobufDetectionModel
-from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
-from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph
-
-__all__ = [
-    "add_export_config",
-    "export_caffe2_model",
-    "Caffe2Model",
-    "export_onnx_model",
-    "Caffe2Tracer",
-]
-
-
-def add_export_config(cfg):
-    """
-    Args:
-        cfg (CfgNode): a detectron2 config
-
-    Returns:
-        CfgNode: an updated config with new options that will be used
-            by :class:`Caffe2Tracer`.
-    """
-    is_frozen = cfg.is_frozen()
-    cfg.defrost()
-    cfg.EXPORT_CAFFE2 = CN()
-    cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False
-    if is_frozen:
-        cfg.freeze()
-    return cfg
-
-
-class Caffe2Tracer:
-    """
-    Make a detectron2 model traceable with caffe2 style.
-
-    An original detectron2 model may not be traceable, or
-    cannot be deployed directly after being traced, due to some reasons:
-    1. control flow in some ops
-    2. custom ops
-    3. complicated pre/post processing
-
-    This class provides a traceable version of a detectron2 model by:
-    1. Rewrite parts of the model using ops in caffe2. Note that some ops do
-       not have GPU implementation.
-    2. Define the inputs "after pre-processing" as inputs to the model
-    3. Remove post-processing and produce raw layer outputs
-
-    More specifically about inputs: all builtin models take two input tensors.
-    (1) NCHW float "data" which is an image (usually in [0, 255])
-    (2) Nx3 float "im_info", each row of which is (height, width, 1.0)
-
-    After making a traceable model, the class provide methods to export such a
-    model to different deployment formats.
-
-    The class currently only supports models using builtin meta architectures.
-    """
-
-    def __init__(self, cfg, model, inputs):
-        """
-        Args:
-            cfg (CfgNode): a detectron2 config, with extra export-related options
-                added by :func:`add_export_config`.
-            model (nn.Module): a model built by
-                :func:`detectron2.modeling.build_model`.
-            inputs: sample inputs that the given model takes for inference.
-                Will be used to trace the model.
-        """
-        assert isinstance(cfg, CN), cfg
-        assert isinstance(model, torch.nn.Module), type(model)
-        if "EXPORT_CAFFE2" not in cfg:
-            cfg = add_export_config(cfg)  # will just the defaults
-
-        self.cfg = cfg
-        self.model = model
-        self.inputs = inputs
-
-    def _get_traceable(self):
-        # TODO how to make it extensible to support custom models
-        C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[self.cfg.MODEL.META_ARCHITECTURE]
-        traceable_model = C2MetaArch(self.cfg, copy.deepcopy(self.model))
-        traceable_inputs = traceable_model.get_caffe2_inputs(self.inputs)
-        return traceable_model, traceable_inputs
-
-    def export_caffe2(self):
-        """
-        Export the model to Caffe2's protobuf format.
-        The returned object can be saved with `.save_protobuf()` method.
-        The result can be loaded and executed using Caffe2 runtime.
-
-        Returns:
-            Caffe2Model
-        """
-        model, inputs = self._get_traceable()
-        predict_net, init_net = export_caffe2_detection_model(model, inputs)
-        return Caffe2Model(predict_net, init_net)
-
-    def export_onnx(self):
-        """
-        Export the model to ONNX format.
-        Note that the exported model contains custom ops only available in caffe2, therefore it
-        cannot be directly executed by other runtime. Post-processing or transformation passes
-        may be applied on the model to accommodate different runtimes.
-
-        Returns:
-            onnx.ModelProto: an onnx model.
-        """
-        model, inputs = self._get_traceable()
-        return export_onnx_model_impl(model, (inputs,))
-
-    def export_torchscript(self):
-        """
-        Export the model to a `torch.jit.TracedModule` by tracing.
-        The returned object can be saved to a file by ".save()".
-
-        Returns:
-            torch.jit.TracedModule: a torch TracedModule
-        """
-        model, inputs = self._get_traceable()
-        logger = logging.getLogger(__name__)
-        logger.info("Tracing the model with torch.jit.trace ...")
-        with torch.no_grad():
-            return torch.jit.trace(model, (inputs,), optimize=True)
-
-
-def export_caffe2_model(cfg, model, inputs):
-    """
-    Export a detectron2 model to caffe2 format.
-
-    Args:
-        cfg (CfgNode): a detectron2 config, with extra export-related options
-            added by :func:`add_export_config`.
-        model (nn.Module): a model built by
-            :func:`detectron2.modeling.build_model`.
-            It will be modified by this function.
-        inputs: sample inputs that the given model takes for inference.
-            Will be used to trace the model.
-
-    Returns:
-        Caffe2Model
-    """
-    return Caffe2Tracer(cfg, model, inputs).export_caffe2()
-
-
-def export_onnx_model(cfg, model, inputs):
-    """
-    Export a detectron2 model to ONNX format.
-    Note that the exported model contains custom ops only available in caffe2, therefore it
-    cannot be directly executed by other runtime. Post-processing or transformation passes
-    may be applied on the model to accommodate different runtimes.
-    Args:
-        cfg (CfgNode): a detectron2 config, with extra export-related options
-            added by :func:`add_export_config`.
-        model (nn.Module): a model built by
-            :func:`detectron2.modeling.build_model`.
-            It will be modified by this function.
-        inputs: sample inputs that the given model takes for inference.
-            Will be used to trace the model.
-    Returns:
-        onnx.ModelProto: an onnx model.
-    """
-    return Caffe2Tracer(cfg, model, inputs).export_onnx()
-
-
-class Caffe2Model(nn.Module):
-    """
-    A wrapper around the traced model in caffe2's pb format.
-    """
-
-    def __init__(self, predict_net, init_net):
-        super().__init__()
-        self.eval()  # always in eval mode
-        self._predict_net = predict_net
-        self._init_net = init_net
-        self._predictor = None
-
-    @property
-    def predict_net(self):
-        """
-        Returns:
-            core.Net: the underlying caffe2 predict net
-        """
-        return self._predict_net
-
-    @property
-    def init_net(self):
-        """
-        Returns:
-            core.Net: the underlying caffe2 init net
-        """
-        return self._init_net
-
-    __init__.__HIDE_SPHINX_DOC__ = True
-
-    def save_protobuf(self, output_dir):
-        """
-        Save the model as caffe2's protobuf format.
-
-        Args:
-            output_dir (str): the output directory to save protobuf files.
-        """
-        logger = logging.getLogger(__name__)
-        logger.info("Saving model to {} ...".format(output_dir))
-        os.makedirs(output_dir, exist_ok=True)
-
-        with open(os.path.join(output_dir, "model.pb"), "wb") as f:
-            f.write(self._predict_net.SerializeToString())
-        with open(os.path.join(output_dir, "model.pbtxt"), "w") as f:
-            f.write(str(self._predict_net))
-        with open(os.path.join(output_dir, "model_init.pb"), "wb") as f:
-            f.write(self._init_net.SerializeToString())
-
-    def save_graph(self, output_file, inputs=None):
-        """
-        Save the graph as SVG format.
-
-        Args:
-            output_file (str): a SVG file
-            inputs: optional inputs given to the model.
-                If given, the inputs will be used to run the graph to record
-                shape of every tensor. The shape information will be
-                saved together with the graph.
-        """
-        if inputs is None:
-            save_graph(self._predict_net, output_file, op_only=False)
-        else:
-            size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0)
-            device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii")
-            inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device)
-            inputs = [x.cpu().numpy() for x in inputs]
-            run_and_save_graph(self._predict_net, self._init_net, inputs, output_file)
-
-    @staticmethod
-    def load_protobuf(dir):
-        """
-        Args:
-            dir (str): a directory used to save Caffe2Model with
-                :meth:`save_protobuf`.
-                The files "model.pb" and "model_init.pb" are needed.
-
-        Returns:
-            Caffe2Model: the caffe2 model loaded from this directory.
-        """
-        predict_net = caffe2_pb2.NetDef()
-        with open(os.path.join(dir, "model.pb"), "rb") as f:
-            predict_net.ParseFromString(f.read())
-
-        init_net = caffe2_pb2.NetDef()
-        with open(os.path.join(dir, "model_init.pb"), "rb") as f:
-            init_net.ParseFromString(f.read())
-
-        return Caffe2Model(predict_net, init_net)
-
-    def __call__(self, inputs):
-        """
-        An interface that wraps around a caffe2 model and mimics detectron2's models'
-        input & output format. This is used to compare the outputs of caffe2 model
-        with its original torch model.
-
-        Due to the extra conversion between torch/caffe2,
-        this method is not meant for benchmark.
-        """
-        if self._predictor is None:
-            self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net)
-        return self._predictor(inputs)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py
deleted file mode 100644
index 6e3cbe3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py
+++ /dev/null
@@ -1,503 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import math
-import torch
-import torch.nn.functional as F
-
-from detectron2.layers import cat
-from detectron2.layers.roi_align_rotated import ROIAlignRotated
-from detectron2.modeling import poolers
-from detectron2.modeling.proposal_generator import rpn
-from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
-from detectron2.structures import Boxes, ImageList, Instances, Keypoints
-
-from .shared import alias, to_device
-
-
-"""
-This file contains caffe2-compatible implementation of several detectrno2 components.
-"""
-
-
-class Caffe2Boxes(Boxes):
-    """
-    Representing a list of detectron2.structures.Boxes from minibatch, each box
-    is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
-    (batch index + 5 coordinates) for RotatedBoxes.
-    """
-
-    def __init__(self, tensor):
-        assert isinstance(tensor, torch.Tensor)
-        assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
-        # TODO: make tensor immutable when dim is Nx5 for Boxes,
-        # and Nx6 for RotatedBoxes?
-        self.tensor = tensor
-
-
-# TODO clean up this class, maybe just extend Instances
-class InstancesList(object):
-    """
-    Tensor representation of a list of Instances object for a batch of images.
-
-    When dealing with a batch of images with Caffe2 ops, a list of bboxes
-    (instances) are usually represented by single Tensor with size
-    (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
-    for providing common functions to convert between these two representations.
-    """
-
-    def __init__(self, im_info, indices, extra_fields=None):
-        # [N, 3] -> (H, W, Scale)
-        self.im_info = im_info
-        # [N,] -> indice of batch to which the instance belongs
-        self.indices = indices
-        # [N, ...]
-        self.batch_extra_fields = extra_fields or {}
-
-        self.image_size = self.im_info
-
-    def get_fields(self):
-        """ like `get_fields` in the Instances object,
-        but return each field in tensor representations """
-        ret = {}
-        for k, v in self.batch_extra_fields.items():
-            # if isinstance(v, torch.Tensor):
-            #     tensor_rep = v
-            # elif isinstance(v, (Boxes, Keypoints)):
-            #     tensor_rep = v.tensor
-            # else:
-            #     raise ValueError("Can't find tensor representation for: {}".format())
-            ret[k] = v
-        return ret
-
-    def has(self, name):
-        return name in self.batch_extra_fields
-
-    def set(self, name, value):
-        data_len = len(value)
-        if len(self.batch_extra_fields):
-            assert (
-                len(self) == data_len
-            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
-        self.batch_extra_fields[name] = value
-
-    def __setattr__(self, name, val):
-        if name in ["im_info", "indices", "batch_extra_fields", "image_size"]:
-            super().__setattr__(name, val)
-        else:
-            self.set(name, val)
-
-    def __getattr__(self, name):
-        if name not in self.batch_extra_fields:
-            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
-        return self.batch_extra_fields[name]
-
-    def __len__(self):
-        return len(self.indices)
-
-    def flatten(self):
-        ret = []
-        for _, v in self.batch_extra_fields.items():
-            if isinstance(v, (Boxes, Keypoints)):
-                ret.append(v.tensor)
-            else:
-                ret.append(v)
-        return ret
-
-    @staticmethod
-    def to_d2_instances_list(instances_list):
-        """
-        Convert InstancesList to List[Instances]. The input `instances_list` can
-        also be a List[Instances], in this case this method is a non-op.
-        """
-        if not isinstance(instances_list, InstancesList):
-            assert all(isinstance(x, Instances) for x in instances_list)
-            return instances_list
-
-        ret = []
-        for i, info in enumerate(instances_list.im_info):
-            instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
-
-            ids = instances_list.indices == i
-            for k, v in instances_list.batch_extra_fields.items():
-                if isinstance(v, torch.Tensor):
-                    instances.set(k, v[ids])
-                    continue
-                elif isinstance(v, Boxes):
-                    instances.set(k, v[ids, -4:])
-                    continue
-
-                target_type, tensor_source = v
-                assert isinstance(tensor_source, torch.Tensor)
-                assert tensor_source.shape[0] == instances_list.indices.shape[0]
-                tensor_source = tensor_source[ids]
-
-                if issubclass(target_type, Boxes):
-                    instances.set(k, Boxes(tensor_source[:, -4:]))
-                elif issubclass(target_type, Keypoints):
-                    instances.set(k, Keypoints(tensor_source))
-                elif issubclass(target_type, torch.Tensor):
-                    instances.set(k, tensor_source)
-                else:
-                    raise ValueError("Can't handle targe type: {}".format(target_type))
-
-            ret.append(instances)
-        return ret
-
-
-class Caffe2Compatible(object):
-    def _get_tensor_mode(self):
-        return self._tensor_mode
-
-    def _set_tensor_mode(self, v):
-        self._tensor_mode = v
-
-    tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
-    """
-    If true, the model expects C2-style tensor only inputs/outputs format.
-    """
-
-
-class Caffe2RPN(Caffe2Compatible, rpn.RPN):
-    def forward(self, images, features, gt_instances=None):
-        assert not self.training
-
-        features = [features[f] for f in self.in_features]
-        objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
-
-        assert isinstance(images, ImageList)
-        if self.tensor_mode:
-            im_info = images.image_sizes
-        else:
-            im_info = torch.Tensor(
-                [[im_sz[0], im_sz[1], torch.Tensor([1.0])] for im_sz in images.image_sizes]
-            ).to(images.tensor.device)
-        assert isinstance(im_info, torch.Tensor)
-
-        rpn_rois_list = []
-        rpn_roi_probs_list = []
-        for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
-            objectness_logits_pred,
-            anchor_deltas_pred,
-            iter(self.anchor_generator.cell_anchors),
-            self.anchor_generator.strides,
-        ):
-            scores = scores.detach()
-            bbox_deltas = bbox_deltas.detach()
-
-            rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
-                scores,
-                bbox_deltas,
-                im_info,
-                cell_anchors_tensor,
-                spatial_scale=1.0 / feat_stride,
-                pre_nms_topN=self.pre_nms_topk[self.training],
-                post_nms_topN=self.post_nms_topk[self.training],
-                nms_thresh=self.nms_thresh,
-                min_size=self.min_box_side_len,
-                # correct_transform_coords=True,  # deprecated argument
-                angle_bound_on=True,  # Default
-                angle_bound_lo=-180,
-                angle_bound_hi=180,
-                clip_angle_thresh=1.0,  # Default
-                legacy_plus_one=False,
-            )
-            rpn_rois_list.append(rpn_rois)
-            rpn_roi_probs_list.append(rpn_roi_probs)
-
-        # For FPN in D2, in RPN all proposals from different levels are concated
-        # together, ranked and picked by top post_nms_topk. Then in ROIPooler
-        # it calculates level_assignments and calls the RoIAlign from
-        # the corresponding level.
-
-        if len(objectness_logits_pred) == 1:
-            rpn_rois = rpn_rois_list[0]
-            rpn_roi_probs = rpn_roi_probs_list[0]
-        else:
-            assert len(rpn_rois_list) == len(rpn_roi_probs_list)
-            rpn_post_nms_topN = self.post_nms_topk[self.training]
-
-            device = rpn_rois_list[0].device
-            input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
-
-            # TODO remove this after confirming rpn_max_level/rpn_min_level
-            # is not needed in CollectRpnProposals.
-            feature_strides = list(self.anchor_generator.strides)
-            rpn_min_level = int(math.log2(feature_strides[0]))
-            rpn_max_level = int(math.log2(feature_strides[-1]))
-            assert (rpn_max_level - rpn_min_level + 1) == len(
-                rpn_rois_list
-            ), "CollectRpnProposals requires continuous levels"
-
-            rpn_rois = torch.ops._caffe2.CollectRpnProposals(
-                input_list,
-                # NOTE: in current implementation, rpn_max_level and rpn_min_level
-                # are not needed, only the subtraction of two matters and it
-                # can be infer from the number of inputs. Keep them now for
-                # consistency.
-                rpn_max_level=2 + len(rpn_rois_list) - 1,
-                rpn_min_level=2,
-                rpn_post_nms_topN=rpn_post_nms_topN,
-            )
-            rpn_rois = to_device(rpn_rois, device)
-            rpn_roi_probs = []
-
-        proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
-        return proposals, {}
-
-    @staticmethod
-    def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
-        proposals = InstancesList(
-            im_info=im_info,
-            indices=rpn_rois[:, 0],
-            extra_fields={
-                "proposal_boxes": Caffe2Boxes(rpn_rois),
-                "objectness_logits": (torch.Tensor, rpn_roi_probs),
-            },
-        )
-        if not tensor_mode:
-            proposals = InstancesList.to_d2_instances_list(proposals)
-        else:
-            proposals = [proposals]
-        return proposals
-
-
-class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
-    @staticmethod
-    def c2_preprocess(box_lists):
-        assert all(isinstance(x, Boxes) for x in box_lists)
-        if all(isinstance(x, Caffe2Boxes) for x in box_lists):
-            # input is pure-tensor based
-            assert len(box_lists) == 1
-            pooler_fmt_boxes = box_lists[0].tensor
-        else:
-            pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
-        return pooler_fmt_boxes
-
-    def forward(self, x, box_lists):
-        assert not self.training
-
-        pooler_fmt_boxes = self.c2_preprocess(box_lists)
-        num_level_assignments = len(self.level_poolers)
-
-        if num_level_assignments == 1:
-            if isinstance(self.level_poolers[0], ROIAlignRotated):
-                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
-                aligned = True
-            else:
-                c2_roi_align = torch.ops._caffe2.RoIAlign
-                aligned = self.level_poolers[0].aligned
-
-            out = c2_roi_align(
-                x[0],
-                pooler_fmt_boxes,
-                order="NCHW",
-                spatial_scale=float(self.level_poolers[0].spatial_scale),
-                pooled_h=int(self.output_size[0]),
-                pooled_w=int(self.output_size[1]),
-                sampling_ratio=int(self.level_poolers[0].sampling_ratio),
-                aligned=aligned,
-            )
-            return out
-
-        device = pooler_fmt_boxes.device
-        assert (
-            self.max_level - self.min_level + 1 == 4
-        ), "Currently DistributeFpnProposals only support 4 levels"
-        fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
-            to_device(pooler_fmt_boxes, "cpu"),
-            roi_canonical_scale=self.canonical_box_size,
-            roi_canonical_level=self.canonical_level,
-            roi_max_level=self.max_level,
-            roi_min_level=self.min_level,
-            legacy_plus_one=False,
-        )
-        fpn_outputs = [to_device(x, device) for x in fpn_outputs]
-
-        rois_fpn_list = fpn_outputs[:-1]
-        rois_idx_restore_int32 = fpn_outputs[-1]
-
-        roi_feat_fpn_list = []
-        for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
-            if isinstance(pooler, ROIAlignRotated):
-                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
-                aligned = True
-            else:
-                c2_roi_align = torch.ops._caffe2.RoIAlign
-                aligned = bool(pooler.aligned)
-
-            roi_feat_fpn = c2_roi_align(
-                x_level,
-                roi_fpn,
-                order="NCHW",
-                spatial_scale=float(pooler.spatial_scale),
-                pooled_h=int(self.output_size[0]),
-                pooled_w=int(self.output_size[1]),
-                sampling_ratio=int(pooler.sampling_ratio),
-                aligned=aligned,
-            )
-            roi_feat_fpn_list.append(roi_feat_fpn)
-
-        roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
-        roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
-        return roi_feat
-
-
-class Caffe2FastRCNNOutputsInference:
-    def __init__(self, tensor_mode):
-        self.tensor_mode = tensor_mode  # whether the output is caffe2 tensor mode
-
-    def __call__(self, box_predictor, predictions, proposals):
-        """ equivalent to FastRCNNOutputLayers.inference """
-        score_thresh = box_predictor.test_score_thresh
-        nms_thresh = box_predictor.test_nms_thresh
-        topk_per_image = box_predictor.test_topk_per_image
-        is_rotated = len(box_predictor.box2box_transform.weights) == 5
-
-        if is_rotated:
-            box_dim = 5
-            assert box_predictor.box2box_transform.weights[4] == 1, (
-                "The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
-                + " thus enforcing the angle weight to be 1 for now"
-            )
-            box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
-        else:
-            box_dim = 4
-            box2box_transform_weights = box_predictor.box2box_transform.weights
-
-        class_logits, box_regression = predictions
-        class_prob = F.softmax(class_logits, -1)
-
-        assert box_regression.shape[1] % box_dim == 0
-        cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
-
-        input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
-
-        rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals])
-        device, dtype = rois.tensor.device, rois.tensor.dtype
-        if input_tensor_mode:
-            im_info = proposals[0].image_size
-            rois = rois.tensor
-        else:
-            im_info = torch.Tensor(
-                [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]
-            )
-            batch_ids = cat(
-                [
-                    torch.full((b, 1), i, dtype=dtype, device=device)
-                    for i, b in enumerate(len(p) for p in proposals)
-                ],
-                dim=0,
-            )
-            rois = torch.cat([batch_ids, rois.tensor], dim=1)
-
-        roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
-            to_device(rois, "cpu"),
-            to_device(box_regression, "cpu"),
-            to_device(im_info, "cpu"),
-            weights=box2box_transform_weights,
-            apply_scale=True,
-            rotated=is_rotated,
-            angle_bound_on=True,
-            angle_bound_lo=-180,
-            angle_bound_hi=180,
-            clip_angle_thresh=1.0,
-            legacy_plus_one=False,
-        )
-        roi_pred_bbox = to_device(roi_pred_bbox, device)
-        roi_batch_splits = to_device(roi_batch_splits, device)
-
-        nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
-            to_device(class_prob, "cpu"),
-            to_device(roi_pred_bbox, "cpu"),
-            to_device(roi_batch_splits, "cpu"),
-            score_thresh=float(score_thresh),
-            nms=float(nms_thresh),
-            detections_per_im=int(topk_per_image),
-            soft_nms_enabled=False,
-            soft_nms_method="linear",
-            soft_nms_sigma=0.5,
-            soft_nms_min_score_thres=0.001,
-            rotated=is_rotated,
-            cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
-            input_boxes_include_bg_cls=False,
-            output_classes_include_bg_cls=False,
-            legacy_plus_one=False,
-        )
-        roi_score_nms = to_device(nms_outputs[0], device)
-        roi_bbox_nms = to_device(nms_outputs[1], device)
-        roi_class_nms = to_device(nms_outputs[2], device)
-        roi_batch_splits_nms = to_device(nms_outputs[3], device)
-        roi_keeps_nms = to_device(nms_outputs[4], device)
-        roi_keeps_size_nms = to_device(nms_outputs[5], device)
-        if not self.tensor_mode:
-            roi_class_nms = roi_class_nms.to(torch.int64)
-
-        roi_batch_ids = cat(
-            [
-                torch.full((b, 1), i, dtype=dtype, device=device)
-                for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
-            ],
-            dim=0,
-        )
-
-        roi_class_nms = alias(roi_class_nms, "class_nms")
-        roi_score_nms = alias(roi_score_nms, "score_nms")
-        roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
-        roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
-        roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
-        roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
-
-        results = InstancesList(
-            im_info=im_info,
-            indices=roi_batch_ids[:, 0],
-            extra_fields={
-                "pred_boxes": Caffe2Boxes(roi_bbox_nms),
-                "scores": roi_score_nms,
-                "pred_classes": roi_class_nms,
-            },
-        )
-
-        if not self.tensor_mode:
-            results = InstancesList.to_d2_instances_list(results)
-            batch_splits = roi_batch_splits_nms.int().tolist()
-            kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
-        else:
-            results = [results]
-            kept_indices = [roi_keeps_nms]
-
-        return results, kept_indices
-
-
-class Caffe2MaskRCNNInference:
-    def __call__(self, pred_mask_logits, pred_instances):
-        """ equivalent to mask_head.mask_rcnn_inference """
-        if all(isinstance(x, InstancesList) for x in pred_instances):
-            assert len(pred_instances) == 1
-            mask_probs_pred = pred_mask_logits.sigmoid()
-            mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
-            pred_instances[0].pred_masks = mask_probs_pred
-        else:
-            mask_rcnn_inference(pred_mask_logits, pred_instances)
-
-
-class Caffe2KeypointRCNNInference:
-    def __init__(self, use_heatmap_max_keypoint):
-        self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
-
-    def __call__(self, pred_keypoint_logits, pred_instances):
-        # just return the keypoint heatmap for now,
-        # there will be option to call HeatmapMaxKeypointOp
-        output = alias(pred_keypoint_logits, "kps_score")
-        if all(isinstance(x, InstancesList) for x in pred_instances):
-            assert len(pred_instances) == 1
-            if self.use_heatmap_max_keypoint:
-                device = output.device
-                output = torch.ops._caffe2.HeatmapMaxKeypoint(
-                    to_device(output, "cpu"),
-                    pred_instances[0].pred_boxes.tensor,
-                    should_output_softmax=True,  # worth make it configerable?
-                )
-                output = to_device(output, device)
-                output = alias(output, "keypoints_out")
-            pred_instances[0].pred_keypoints = output
-        return pred_keypoint_logits
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py
deleted file mode 100644
index ccac809..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import copy
-import io
-import logging
-import numpy as np
-from typing import List
-import onnx
-import torch
-from caffe2.proto import caffe2_pb2
-from caffe2.python import core
-from caffe2.python.onnx.backend import Caffe2Backend
-from tabulate import tabulate
-from termcolor import colored
-from torch.onnx import OperatorExportTypes
-
-from .shared import (
-    ScopedWS,
-    construct_init_net_from_params,
-    fuse_alias_placeholder,
-    fuse_copy_between_cpu_and_gpu,
-    get_params_from_init_net,
-    group_norm_replace_aten_with_caffe2,
-    infer_device_type,
-    remove_dead_end_ops,
-    remove_reshape_for_fc,
-    save_graph,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def export_onnx_model(model, inputs):
-    """
-    Trace and export a model to onnx format.
-
-    Args:
-        model (nn.Module):
-        inputs (tuple[args]): the model will be called by `model(*inputs)`
-
-    Returns:
-        an onnx model
-    """
-    assert isinstance(model, torch.nn.Module)
-
-    # make sure all modules are in eval mode, onnx may change the training state
-    # of the module if the states are not consistent
-    def _check_eval(module):
-        assert not module.training
-
-    model.apply(_check_eval)
-
-    # Export the model to ONNX
-    with torch.no_grad():
-        with io.BytesIO() as f:
-            torch.onnx.export(
-                model,
-                inputs,
-                f,
-                operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
-                # verbose=True,  # NOTE: uncomment this for debugging
-                # export_params=True,
-            )
-            onnx_model = onnx.load_from_string(f.getvalue())
-
-    # Apply ONNX's Optimization
-    all_passes = onnx.optimizer.get_available_passes()
-    passes = ["fuse_bn_into_conv"]
-    assert all(p in all_passes for p in passes)
-    onnx_model = onnx.optimizer.optimize(onnx_model, passes)
-    return onnx_model
-
-
-def _op_stats(net_def):
-    type_count = {}
-    for t in [op.type for op in net_def.op]:
-        type_count[t] = type_count.get(t, 0) + 1
-    type_count_list = sorted(type_count.items(), key=lambda kv: kv[0])  # alphabet
-    type_count_list = sorted(type_count_list, key=lambda kv: -kv[1])  # count
-    return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list)
-
-
-def _assign_device_option(
-    predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor]
-):
-    """
-    ONNX exported network doesn't have concept of device, assign necessary
-    device option for each op in order to make it runable on GPU runtime.
-    """
-
-    def _get_device_type(torch_tensor):
-        assert torch_tensor.device.type in ["cpu", "cuda"]
-        assert torch_tensor.device.index == 0
-        return torch_tensor.device.type
-
-    def _assign_op_device_option(net_proto, net_ssa, blob_device_types):
-        for op, ssa_i in zip(net_proto.op, net_ssa):
-            if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]:
-                op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
-            else:
-                devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]]
-                assert all(d == devices[0] for d in devices)
-                if devices[0] == "cuda":
-                    op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
-
-    # update ops in predict_net
-    predict_net_input_device_types = {
-        (name, 0): _get_device_type(tensor)
-        for name, tensor in zip(predict_net.external_input, tensor_inputs)
-    }
-    predict_net_device_types = infer_device_type(
-        predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch"
-    )
-    predict_net_ssa, _ = core.get_ssa(predict_net)
-    _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types)
-
-    # update ops in init_net
-    init_net_ssa, versions = core.get_ssa(init_net)
-    init_net_output_device_types = {
-        (name, versions[name]): predict_net_device_types[(name, 0)]
-        for name in init_net.external_output
-    }
-    init_net_device_types = infer_device_type(
-        init_net, known_status=init_net_output_device_types, device_name_style="pytorch"
-    )
-    _assign_op_device_option(init_net, init_net_ssa, init_net_device_types)
-
-
-def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]):
-    """
-    Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX.
-
-    Arg:
-        model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py
-        tensor_inputs: a list of tensors that caffe2 model takes as input.
-    """
-    model = copy.deepcopy(model)
-    assert isinstance(model, torch.nn.Module)
-    assert hasattr(model, "encode_additional_info")
-
-    # Export via ONNX
-    logger.info("Exporting a {} model via ONNX ...".format(type(model).__name__))
-    onnx_model = export_onnx_model(model, (tensor_inputs,))
-    # Convert ONNX model to Caffe2 protobuf
-    init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
-    ops_table = [[op.type, op.input, op.output] for op in predict_net.op]
-    table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe")
-    logger.info(
-        "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan")
-    )
-
-    # Apply protobuf optimization
-    fuse_alias_placeholder(predict_net, init_net)
-    if any(t.device.type != "cpu" for t in tensor_inputs):
-        fuse_copy_between_cpu_and_gpu(predict_net)
-        remove_dead_end_ops(init_net)
-        _assign_device_option(predict_net, init_net, tensor_inputs)
-    params, device_options = get_params_from_init_net(init_net)
-    predict_net, params = remove_reshape_for_fc(predict_net, params)
-    init_net = construct_init_net_from_params(params, device_options)
-    group_norm_replace_aten_with_caffe2(predict_net)
-
-    # Record necessary information for running the pb model in Detectron2 system.
-    model.encode_additional_info(predict_net, init_net)
-
-    logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net)))
-    logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net)))
-
-    return predict_net, init_net
-
-
-def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path):
-    """
-    Run the caffe2 model on given inputs, recording the shape and draw the graph.
-
-    predict_net/init_net: caffe2 model.
-    tensor_inputs: a list of tensors that caffe2 model takes as input.
-    graph_save_path: path for saving graph of exported model.
-    """
-
-    logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path))
-    save_graph(predict_net, graph_save_path, op_only=False)
-
-    # Run the exported Caffe2 net
-    logger.info("Running ONNX exported model ...")
-    with ScopedWS("__ws_tmp__", True) as ws:
-        ws.RunNetOnce(init_net)
-        initialized_blobs = set(ws.Blobs())
-        uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs]
-        for name, blob in zip(uninitialized, tensor_inputs):
-            ws.FeedBlob(name, blob)
-
-        try:
-            ws.RunNetOnce(predict_net)
-        except RuntimeError as e:
-            logger.warning("Encountered RuntimeError: \n{}".format(str(e)))
-
-        ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()}
-        blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)}
-
-        logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path))
-        save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes)
-
-        return ws_blobs
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py
deleted file mode 100644
index 92718d0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import collections
-import logging
-import numpy as np
-import torch
-from caffe2.proto import caffe2_pb2
-from caffe2.python import core
-
-from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
-from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
-
-logger = logging.getLogger(__name__)
-
-
-class ProtobufModel(torch.nn.Module):
-    """
-    A class works just like nn.Module in terms of inference, but running
-    caffe2 model under the hood. Input/Output are Dict[str, tensor] whose keys
-    are in external_input/output.
-    """
-
-    def __init__(self, predict_net, init_net):
-        logger.info("Initializing ProtobufModel ...")
-        super().__init__()
-        assert isinstance(predict_net, caffe2_pb2.NetDef)
-        assert isinstance(init_net, caffe2_pb2.NetDef)
-        self.ws_name = "__ws_tmp__"
-        self.net = core.Net(predict_net)
-
-        with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
-            ws.RunNetOnce(init_net)
-            for blob in self.net.Proto().external_input:
-                if blob not in ws.Blobs():
-                    ws.CreateBlob(blob)
-            ws.CreateNet(self.net)
-
-        self._error_msgs = set()
-
-    def forward(self, inputs_dict):
-        assert all(inp in self.net.Proto().external_input for inp in inputs_dict)
-        with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
-            for b, tensor in inputs_dict.items():
-                ws.FeedBlob(b, tensor)
-            try:
-                ws.RunNet(self.net.Proto().name)
-            except RuntimeError as e:
-                if not str(e) in self._error_msgs:
-                    self._error_msgs.add(str(e))
-                    logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
-                logger.warning("Catch the error and use partial results.")
-
-            outputs_dict = collections.OrderedDict(
-                [(b, ws.FetchBlob(b)) for b in self.net.Proto().external_output]
-            )
-            # Remove outputs of current run, this is necessary in order to
-            # prevent fetching the result from previous run if the model fails
-            # in the middle.
-            for b in self.net.Proto().external_output:
-                # Needs to create uninitialized blob to make the net runable.
-                # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
-                # but there'no such API.
-                ws.FeedBlob(b, "{}, a C++ native class of type nullptr (uninitialized).".format(b))
-
-        return outputs_dict
-
-
-class ProtobufDetectionModel(torch.nn.Module):
-    """
-    A class works just like a pytorch meta arch in terms of inference, but running
-    caffe2 model under the hood.
-    """
-
-    def __init__(self, predict_net, init_net, *, convert_outputs=None):
-        """
-        Args:
-            predict_net, init_net (core.Net): caffe2 nets
-            convert_outptus (callable): a function that converts caffe2
-                outputs to the same format of the original pytorch model.
-                By default, use the one defined in the caffe2 meta_arch.
-        """
-        super().__init__()
-        self.protobuf_model = ProtobufModel(predict_net, init_net)
-        self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
-        self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
-
-        if convert_outputs is None:
-            meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
-            meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
-            self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
-        else:
-            self._convert_outputs = convert_outputs
-
-    def _infer_output_devices(self, inputs_dict):
-        def _get_device_type(torch_tensor):
-            assert torch_tensor.device.type in ["cpu", "cuda"]
-            assert torch_tensor.device.index == 0
-            return torch_tensor.device.type
-
-        predict_net = self.protobuf_model.net.Proto()
-        input_device_types = {
-            (name, 0): _get_device_type(tensor) for name, tensor in inputs_dict.items()
-        }
-        device_type_map = infer_device_type(
-            predict_net, known_status=input_device_types, device_name_style="pytorch"
-        )
-        ssa, versions = core.get_ssa(predict_net)
-        versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
-        output_devices = [device_type_map[outp] for outp in versioned_outputs]
-        return output_devices
-
-    def _convert_inputs(self, batched_inputs):
-        # currently all models convert inputs in the same way
-        data, im_info = convert_batched_inputs_to_c2_format(
-            batched_inputs, self.size_divisibility, self.device
-        )
-        return {"data": data, "im_info": im_info}
-
-    def forward(self, batched_inputs):
-        c2_inputs = self._convert_inputs(batched_inputs)
-        c2_results = self.protobuf_model(c2_inputs)
-
-        if any(t.device.type != "cpu" for _, t in c2_inputs.items()):
-            output_devices = self._infer_output_devices(c2_inputs)
-        else:
-            output_devices = ["cpu" for _ in self.protobuf_model.net.Proto().external_output]
-
-        def _cast_caffe2_blob_to_torch_tensor(blob, device):
-            return torch.Tensor(blob).to(device) if isinstance(blob, np.ndarray) else None
-
-        c2_results = {
-            name: _cast_caffe2_blob_to_torch_tensor(c2_results[name], device)
-            for name, device in zip(self.protobuf_model.net.Proto().external_output, output_devices)
-        }
-
-        return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py
deleted file mode 100644
index 1732b32..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import functools
-import io
-import struct
-import types
-import torch
-
-from detectron2.modeling import meta_arch
-from detectron2.modeling.box_regression import Box2BoxTransform
-from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs
-from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess
-from detectron2.modeling.roi_heads import keypoint_head
-from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
-
-from .c10 import Caffe2Compatible
-from .patcher import ROIHeadsPatcher, patch_generalized_rcnn
-from .shared import (
-    alias,
-    check_set_pb_arg,
-    get_pb_arg_floats,
-    get_pb_arg_valf,
-    get_pb_arg_vali,
-    get_pb_arg_vals,
-    mock_torch_nn_functional_interpolate,
-)
-
-
-def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False):
-    """
-    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
-    to detectron2's format (i.e. list of Instances instance).
-    This only works when the model follows the Caffe2 detectron's naming convention.
-
-    Args:
-        image_sizes (List[List[int, int]]): [H, W] of every image.
-        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.
-
-        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
-            if the mask is not found from tensor_outputs (usually due to model crash)
-    """
-
-    results = [Instances(image_size) for image_size in image_sizes]
-
-    batch_splits = tensor_outputs.get("batch_splits", None)
-    if batch_splits:
-        raise NotImplementedError()
-    assert len(image_sizes) == 1
-    result = results[0]
-
-    bbox_nms = tensor_outputs["bbox_nms"]
-    score_nms = tensor_outputs["score_nms"]
-    class_nms = tensor_outputs["class_nms"]
-    # Detection will always success because Conv support 0-batch
-    assert bbox_nms is not None
-    assert score_nms is not None
-    assert class_nms is not None
-    if bbox_nms.shape[1] == 5:
-        result.pred_boxes = RotatedBoxes(bbox_nms)
-    else:
-        result.pred_boxes = Boxes(bbox_nms)
-    result.scores = score_nms
-    result.pred_classes = class_nms.to(torch.int64)
-
-    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
-    if mask_fcn_probs is not None:
-        # finish the mask pred
-        mask_probs_pred = mask_fcn_probs
-        num_masks = mask_probs_pred.shape[0]
-        class_pred = result.pred_classes
-        indices = torch.arange(num_masks, device=class_pred.device)
-        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
-        result.pred_masks = mask_probs_pred
-    elif force_mask_on:
-        # NOTE: there's no way to know the height/width of mask here, it won't be
-        # used anyway when batch size is 0, so just set them to 0.
-        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)
-
-    keypoints_out = tensor_outputs.get("keypoints_out", None)
-    kps_score = tensor_outputs.get("kps_score", None)
-    if keypoints_out is not None:
-        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
-        keypoints_tensor = keypoints_out
-        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
-        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
-        # it doesn't affect mAP. TODO: check more carefully.
-        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
-        result.pred_keypoints = keypoint_xyp
-    elif kps_score is not None:
-        # keypoint heatmap to sparse data structure
-        pred_keypoint_logits = kps_score
-        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])
-
-    return results
-
-
-def _cast_to_f32(f64):
-    return struct.unpack("f", struct.pack("f", f64))[0]
-
-
-def set_caffe2_compatible_tensor_mode(model, enable=True):
-    def _fn(m):
-        if isinstance(m, Caffe2Compatible):
-            m.tensor_mode = enable
-
-    model.apply(_fn)
-
-
-def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
-    """
-    See get_caffe2_inputs() below.
-    """
-    assert all(isinstance(x, dict) for x in batched_inputs)
-    assert all(x["image"].dim() == 3 for x in batched_inputs)
-
-    images = [x["image"] for x in batched_inputs]
-    images = ImageList.from_tensors(images, size_divisibility)
-
-    im_info = []
-    for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
-        target_height = input_per_image.get("height", image_size[0])
-        target_width = input_per_image.get("width", image_size[1])  # noqa
-        # NOTE: The scale inside im_info is kept as convention and for providing
-        # post-processing information if further processing is needed. For
-        # current Caffe2 model definitions that don't include post-processing inside
-        # the model, this number is not used.
-        # NOTE: There can be a slight difference between width and height
-        # scales, using a single number can results in numerical difference
-        # compared with D2's post-processing.
-        scale = target_height / image_size[0]
-        im_info.append([image_size[0], image_size[1], scale])
-    im_info = torch.Tensor(im_info)
-
-    return images.tensor.to(device), im_info.to(device)
-
-
-class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module):
-    """
-    Base class for caffe2-compatible implementation of a meta architecture.
-    The forward is traceable and its traced graph can be converted to caffe2
-    graph through ONNX.
-    """
-
-    def __init__(self, cfg, torch_model):
-        """
-        Args:
-            cfg (CfgNode):
-            torch_model (nn.Module): the detectron2 model (meta_arch) to be
-                converted.
-        """
-        super().__init__()
-        self._wrapped_model = torch_model
-        self.eval()
-        set_caffe2_compatible_tensor_mode(self, True)
-
-    def get_caffe2_inputs(self, batched_inputs):
-        """
-        Convert pytorch-style structured inputs to caffe2-style inputs that
-        are tuples of tensors.
-
-        Args:
-            batched_inputs (list[dict]): inputs to a detectron2 model
-                in its standard format. Each dict has "image" (CHW tensor), and optionally
-                "height" and "width".
-
-        Returns:
-            tuple[Tensor]:
-                tuple of tensors that will be the inputs to the
-                :meth:`forward` method. For existing models, the first
-                is an NCHW tensor (padded and batched); the second is
-                a im_info Nx3 tensor, where the rows are
-                (height, width, unused legacy parameter)
-        """
-        return convert_batched_inputs_to_c2_format(
-            batched_inputs,
-            self._wrapped_model.backbone.size_divisibility,
-            self._wrapped_model.device,
-        )
-
-    def encode_additional_info(self, predict_net, init_net):
-        """
-        Save extra metadata that will be used by inference in the output protobuf.
-        """
-        pass
-
-    def forward(self, inputs):
-        """
-        Run the forward in caffe2-style. It has to use caffe2-compatible ops
-        and the method will be used for tracing.
-
-        Args:
-            inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`.
-                They will be the inputs of the converted caffe2 graph.
-
-        Returns:
-            tuple[Tensor]: output tensors. They will be the outputs of the
-                converted caffe2 graph.
-        """
-        raise NotImplementedError
-
-    def _caffe2_preprocess_image(self, inputs):
-        """
-        Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
-        It normalizes the input images, and the final caffe2 graph assumes the
-        inputs have been batched already.
-        """
-        data, im_info = inputs
-        data = alias(data, "data")
-        im_info = alias(im_info, "im_info")
-        mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
-        normalized_data = (data - mean) / std
-        normalized_data = alias(normalized_data, "normalized_data")
-
-        # Pack (data, im_info) into ImageList which is recognized by self.inference.
-        images = ImageList(tensor=normalized_data, image_sizes=im_info)
-        return images
-
-    @staticmethod
-    def get_outputs_converter(predict_net, init_net):
-        """
-        Creates a function that converts outputs of the caffe2 model to
-        detectron2's standard format.
-        The function uses information in `predict_net` and `init_net` that are
-        available at inferene time. Therefore the function logic can be used in inference.
-
-        The returned function has the following signature:
-
-            def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs
-
-        Where
-
-            * batched_inputs (list[dict]): the original input format of the meta arch
-            * c2_inputs (dict[str, Tensor]): the caffe2 inputs.
-            * c2_results (dict[str, Tensor]): the caffe2 output format,
-                corresponding to the outputs of the :meth:`forward` function.
-            * detectron2_outputs: the original output format of the meta arch.
-
-        This function can be used to compare the outputs of the original meta arch and
-        the converted caffe2 graph.
-
-        Returns:
-            callable: a callable of the above signature.
-        """
-        raise NotImplementedError
-
-
-class Caffe2GeneralizedRCNN(Caffe2MetaArch):
-    def __init__(self, cfg, torch_model):
-        assert isinstance(torch_model, meta_arch.GeneralizedRCNN)
-        torch_model = patch_generalized_rcnn(torch_model)
-        super().__init__(cfg, torch_model)
-
-        self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads)
-
-    def encode_additional_info(self, predict_net, init_net):
-        size_divisibility = self._wrapped_model.backbone.size_divisibility
-        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
-        check_set_pb_arg(
-            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
-        )
-        check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN")
-
-    @mock_torch_nn_functional_interpolate()
-    def forward(self, inputs):
-        if not self.tensor_mode:
-            return self._wrapped_model.inference(inputs)
-        images = self._caffe2_preprocess_image(inputs)
-        features = self._wrapped_model.backbone(images.tensor)
-        proposals, _ = self._wrapped_model.proposal_generator(images, features)
-        with self.roi_heads_patcher.mock_roi_heads():
-            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
-        return tuple(detector_results[0].flatten())
-
-    @staticmethod
-    def get_outputs_converter(predict_net, init_net):
-        def f(batched_inputs, c2_inputs, c2_results):
-            image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
-            results = assemble_rcnn_outputs_by_name(image_sizes, c2_results)
-            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
-
-        return f
-
-
-class Caffe2PanopticFPN(Caffe2MetaArch):
-    def __init__(self, cfg, torch_model):
-        assert isinstance(torch_model, meta_arch.PanopticFPN)
-        torch_model = patch_generalized_rcnn(torch_model)
-        super().__init__(cfg, torch_model)
-
-        self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads)
-
-    @mock_torch_nn_functional_interpolate()
-    def forward(self, inputs):
-        assert self.tensor_mode
-        images = self._caffe2_preprocess_image(inputs)
-        features = self._wrapped_model.backbone(images.tensor)
-
-        sem_seg_results, _ = self._wrapped_model.sem_seg_head(features)
-        sem_seg_results = alias(sem_seg_results, "sem_seg")
-
-        proposals, _ = self._wrapped_model.proposal_generator(images, features)
-
-        with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode):
-            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
-
-        return tuple(detector_results[0].flatten()) + (sem_seg_results,)
-
-    def encode_additional_info(self, predict_net, init_net):
-        size_divisibility = self._wrapped_model.backbone.size_divisibility
-        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
-        check_set_pb_arg(
-            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
-        )
-        check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN")
-
-        # Inference parameters:
-        check_set_pb_arg(predict_net, "combine_on", "i", self._wrapped_model.combine_on)
-        check_set_pb_arg(
-            predict_net,
-            "combine_overlap_threshold",
-            "f",
-            _cast_to_f32(self._wrapped_model.combine_overlap_threshold),
-        )
-        check_set_pb_arg(
-            predict_net,
-            "combine_stuff_area_limit",
-            "i",
-            self._wrapped_model.combine_stuff_area_limit,
-        )
-        check_set_pb_arg(
-            predict_net,
-            "combine_instances_confidence_threshold",
-            "f",
-            _cast_to_f32(self._wrapped_model.combine_instances_confidence_threshold),
-        )
-
-    @staticmethod
-    def get_outputs_converter(predict_net, init_net):
-        combine_on = get_pb_arg_vali(predict_net, "combine_on", None)
-        combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None)
-        combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None)
-        combine_instances_confidence_threshold = get_pb_arg_valf(
-            predict_net, "combine_instances_confidence_threshold", None
-        )
-
-        def f(batched_inputs, c2_inputs, c2_results):
-            image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
-            detector_results = assemble_rcnn_outputs_by_name(
-                image_sizes, c2_results, force_mask_on=True
-            )
-            sem_seg_results = c2_results["sem_seg"]
-
-            # copied from meta_arch/panoptic_fpn.py ...
-            processed_results = []
-            for sem_seg_result, detector_result, input_per_image, image_size in zip(
-                sem_seg_results, detector_results, batched_inputs, image_sizes
-            ):
-                height = input_per_image.get("height", image_size[0])
-                width = input_per_image.get("width", image_size[1])
-                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
-                detector_r = detector_postprocess(detector_result, height, width)
-
-                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
-
-                if combine_on:
-                    panoptic_r = combine_semantic_and_instance_outputs(
-                        detector_r,
-                        sem_seg_r.argmax(dim=0),
-                        combine_overlap_threshold,
-                        combine_stuff_area_limit,
-                        combine_instances_confidence_threshold,
-                    )
-                    processed_results[-1]["panoptic_seg"] = panoptic_r
-            return processed_results
-
-        return f
-
-
-class Caffe2RetinaNet(Caffe2MetaArch):
-    def __init__(self, cfg, torch_model):
-        assert isinstance(torch_model, meta_arch.RetinaNet)
-        super().__init__(cfg, torch_model)
-
-    @mock_torch_nn_functional_interpolate()
-    def forward(self, inputs):
-        assert self.tensor_mode
-        images = self._caffe2_preprocess_image(inputs)
-
-        # explicitly return the images sizes to avoid removing "im_info" by ONNX
-        # since it's not used in the forward path
-        return_tensors = [images.image_sizes]
-
-        features = self._wrapped_model.backbone(images.tensor)
-        features = [features[f] for f in self._wrapped_model.in_features]
-        for i, feature_i in enumerate(features):
-            features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True)
-            return_tensors.append(features[i])
-
-        box_cls, box_delta = self._wrapped_model.head(features)
-        for i, (box_cls_i, box_delta_i) in enumerate(zip(box_cls, box_delta)):
-            return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i)))
-            return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i)))
-
-        return tuple(return_tensors)
-
-    def encode_additional_info(self, predict_net, init_net):
-        size_divisibility = self._wrapped_model.backbone.size_divisibility
-        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
-        check_set_pb_arg(
-            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
-        )
-        check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet")
-
-        # Inference parameters:
-        check_set_pb_arg(
-            predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.score_threshold)
-        )
-        check_set_pb_arg(predict_net, "topk_candidates", "i", self._wrapped_model.topk_candidates)
-        check_set_pb_arg(
-            predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.nms_threshold)
-        )
-        check_set_pb_arg(
-            predict_net,
-            "max_detections_per_image",
-            "i",
-            self._wrapped_model.max_detections_per_image,
-        )
-
-        check_set_pb_arg(
-            predict_net,
-            "bbox_reg_weights",
-            "floats",
-            [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights],
-        )
-        self._encode_anchor_generator_cfg(predict_net)
-
-    def _encode_anchor_generator_cfg(self, predict_net):
-        # serialize anchor_generator for future use
-        serialized_anchor_generator = io.BytesIO()
-        torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator)
-        # Ideally we can put anchor generating inside the model, then we don't
-        # need to store this information.
-        bytes = serialized_anchor_generator.getvalue()
-        check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes)
-
-    @staticmethod
-    def get_outputs_converter(predict_net, init_net):
-        self = types.SimpleNamespace()
-        serialized_anchor_generator = io.BytesIO(
-            get_pb_arg_vals(predict_net, "serialized_anchor_generator", None)
-        )
-        self.anchor_generator = torch.load(serialized_anchor_generator)
-        bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None)
-        self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights))
-        self.score_threshold = get_pb_arg_valf(predict_net, "score_threshold", None)
-        self.topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None)
-        self.nms_threshold = get_pb_arg_valf(predict_net, "nms_threshold", None)
-        self.max_detections_per_image = get_pb_arg_vali(
-            predict_net, "max_detections_per_image", None
-        )
-
-        # hack to reuse inference code from RetinaNet
-        self.inference = functools.partial(meta_arch.RetinaNet.inference, self)
-        self.inference_single_image = functools.partial(
-            meta_arch.RetinaNet.inference_single_image, self
-        )
-
-        def f(batched_inputs, c2_inputs, c2_results):
-            image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
-
-            num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")])
-            box_cls = [c2_results["box_cls_{}".format(i)] for i in range(num_features)]
-            box_delta = [c2_results["box_delta_{}".format(i)] for i in range(num_features)]
-
-            # For each feature level, feature should have the same batch size and
-            # spatial dimension as the box_cls and box_delta.
-            dummy_features = [box_delta[i].clone()[:, 0:0, :, :] for i in range(num_features)]
-            anchors = self.anchor_generator(dummy_features)
-
-            # self.num_classess can be inferred
-            self.num_classes = box_cls[0].shape[1] // (box_delta[0].shape[1] // 4)
-
-            results = self.inference(box_cls, box_delta, anchors, image_sizes)
-            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
-
-        return f
-
-
-META_ARCH_CAFFE2_EXPORT_TYPE_MAP = {
-    "GeneralizedRCNN": Caffe2GeneralizedRCNN,
-    "PanopticFPN": Caffe2PanopticFPN,
-    "RetinaNet": Caffe2RetinaNet,
-}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py
deleted file mode 100644
index 3f0b0fd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import contextlib
-import mock
-import torch
-
-from detectron2.modeling import poolers
-from detectron2.modeling.proposal_generator import rpn
-from detectron2.modeling.roi_heads import keypoint_head, mask_head
-from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
-
-from .c10 import (
-    Caffe2Compatible,
-    Caffe2FastRCNNOutputsInference,
-    Caffe2KeypointRCNNInference,
-    Caffe2MaskRCNNInference,
-    Caffe2ROIPooler,
-    Caffe2RPN,
-)
-
-
-class GenericMixin(object):
-    pass
-
-
-class Caffe2CompatibleConverter(object):
-    """
-    A GenericUpdater which implements the `create_from` interface, by modifying
-    module object and assign it with another class replaceCls.
-    """
-
-    def __init__(self, replaceCls):
-        self.replaceCls = replaceCls
-
-    def create_from(self, module):
-        # update module's class to the new class
-        assert isinstance(module, torch.nn.Module)
-        if issubclass(self.replaceCls, GenericMixin):
-            # replaceCls should act as mixin, create a new class on-the-fly
-            new_class = type(
-                "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
-                (self.replaceCls, module.__class__),
-                {},  # {"new_method": lambda self: ...},
-            )
-            module.__class__ = new_class
-        else:
-            # replaceCls is complete class, this allow arbitrary class swap
-            module.__class__ = self.replaceCls
-
-        # initialize Caffe2Compatible
-        if isinstance(module, Caffe2Compatible):
-            module.tensor_mode = False
-
-        return module
-
-
-def patch(model, target, updater, *args, **kwargs):
-    """
-    recursively (post-order) update all modules with the target type and its
-    subclasses, make a initialization/composition/inheritance/... via the
-    updater.create_from.
-    """
-    for name, module in model.named_children():
-        model._modules[name] = patch(module, target, updater, *args, **kwargs)
-    if isinstance(model, target):
-        return updater.create_from(model, *args, **kwargs)
-    return model
-
-
-def patch_generalized_rcnn(model):
-    ccc = Caffe2CompatibleConverter
-    model = patch(model, rpn.RPN, ccc(Caffe2RPN))
-    model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
-
-    return model
-
-
-@contextlib.contextmanager
-def mock_fastrcnn_outputs_inference(
-    tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
-):
-    with mock.patch.object(
-        box_predictor_type,
-        "inference",
-        autospec=True,
-        side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
-    ) as mocked_func:
-        yield
-    if check:
-        assert mocked_func.call_count > 0
-
-
-@contextlib.contextmanager
-def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
-    with mock.patch(
-        "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
-    ) as mocked_func:
-        yield
-    if check:
-        assert mocked_func.call_count > 0
-
-
-@contextlib.contextmanager
-def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
-    with mock.patch(
-        "{}.keypoint_rcnn_inference".format(patched_module),
-        side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
-    ) as mocked_func:
-        yield
-    if check:
-        assert mocked_func.call_count > 0
-
-
-class ROIHeadsPatcher:
-    def __init__(self, cfg, heads):
-        self.heads = heads
-
-        self.use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
-
-    @contextlib.contextmanager
-    def mock_roi_heads(self, tensor_mode=True):
-        """
-        Patching several inference functions inside ROIHeads and its subclasses
-
-        Args:
-            tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
-                format or not. Default to True.
-        """
-        # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
-        # are called inside the same file as BaseXxxHead due to using mock.patch.
-        kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
-        mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
-
-        mock_ctx_managers = [
-            mock_fastrcnn_outputs_inference(
-                tensor_mode=tensor_mode,
-                check=True,
-                box_predictor_type=type(self.heads.box_predictor),
-            )
-        ]
-        if getattr(self.heads, "keypoint_on", False):
-            mock_ctx_managers += [
-                mock_keypoint_rcnn_inference(
-                    tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
-                )
-            ]
-        if getattr(self.heads, "mask_on", False):
-            mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
-
-        with contextlib.ExitStack() as stack:  # python 3.3+
-            for mgr in mock_ctx_managers:
-                stack.enter_context(mgr)
-            yield
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py
deleted file mode 100644
index cb7ffeb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py
+++ /dev/null
@@ -1,1034 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import collections
-import contextlib
-import copy
-import functools
-import logging
-import mock
-import numpy as np
-import os
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-import caffe2.python.utils as putils
-import torch
-import torch.nn.functional as F
-from caffe2.proto import caffe2_pb2
-from caffe2.python import core, net_drawer, workspace
-from torch.nn.functional import interpolate as interp
-
-logger = logging.getLogger(__name__)
-
-
-# ==== torch/utils_toffee/cast.py =======================================
-
-
-def to_device(t, device_str):
-    """
-    This function is a replacement of .to(another_device) such that it allows the
-    casting to be traced properly by explicitly calling the underlying copy ops.
-    It also avoids introducing unncessary op when casting to the same device.
-    """
-    src = t.device
-    dst = torch.device(device_str)
-
-    if src == dst:
-        return t
-    elif src.type == "cuda" and dst.type == "cpu":
-        return torch.ops._caffe2.CopyGPUToCPU(t)
-    elif src.type == "cpu" and dst.type == "cuda":
-        return torch.ops._caffe2.CopyCPUToGPU(t)
-    else:
-        raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst))
-
-
-# ==== torch/utils_toffee/interpolate.py =======================================
-
-
-# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py
-def BilinearInterpolation(tensor_in, up_scale):
-    assert up_scale % 2 == 0, "Scale should be even"
-
-    def upsample_filt(size):
-        factor = (size + 1) // 2
-        if size % 2 == 1:
-            center = factor - 1
-        else:
-            center = factor - 0.5
-
-        og = np.ogrid[:size, :size]
-        return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
-
-    kernel_size = int(up_scale) * 2
-    bil_filt = upsample_filt(kernel_size)
-
-    dim = int(tensor_in.shape[1])
-    kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32)
-    kernel[range(dim), range(dim), :, :] = bil_filt
-
-    tensor_out = F.conv_transpose2d(
-        tensor_in,
-        weight=to_device(torch.Tensor(kernel), tensor_in.device),
-        bias=None,
-        stride=int(up_scale),
-        padding=int(up_scale / 2),
-    )
-
-    return tensor_out
-
-
-# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if
-# using dynamic `scale_factor` rather than static `size`. (T43166860)
-# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly.
-def onnx_compatibale_interpolate(
-    input, size=None, scale_factor=None, mode="nearest", align_corners=None
-):
-    # NOTE: The input dimensions are interpreted in the form:
-    # `mini-batch x channels x [optional depth] x [optional height] x width`.
-    if size is None and scale_factor is not None:
-        if input.dim() == 4:
-            if isinstance(scale_factor, (int, float)):
-                height_scale, width_scale = (scale_factor, scale_factor)
-            else:
-                assert isinstance(scale_factor, (tuple, list))
-                assert len(scale_factor) == 2
-                height_scale, width_scale = scale_factor
-
-            assert not align_corners, "No matching C2 op for align_corners == True"
-            if mode == "nearest":
-                return torch.ops._caffe2.ResizeNearest(
-                    input, order="NCHW", width_scale=width_scale, height_scale=height_scale
-                )
-            elif mode == "bilinear":
-                logger.warning(
-                    "Use F.conv_transpose2d for bilinear interpolate"
-                    " because there's no such C2 op, this may cause significant"
-                    " slowdown and the boundary pixels won't be as same as"
-                    " using F.interpolate due to padding."
-                )
-                assert height_scale == width_scale
-                return BilinearInterpolation(input, up_scale=height_scale)
-        logger.warning("Output size is not static, it might cause ONNX conversion issue")
-
-    return interp(input, size, scale_factor, mode, align_corners)
-
-
-@contextlib.contextmanager
-def mock_torch_nn_functional_interpolate():
-    if torch.onnx.is_in_onnx_export():
-        with mock.patch(
-            "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate
-        ):
-            yield
-    else:
-        yield
-
-
-# ==== torch/utils_caffe2/ws_utils.py ==========================================
-
-
-class ScopedWS(object):
-    def __init__(self, ws_name, is_reset, is_cleanup=False):
-        self.ws_name = ws_name
-        self.is_reset = is_reset
-        self.is_cleanup = is_cleanup
-        self.org_ws = ""
-
-    def __enter__(self):
-        self.org_ws = workspace.CurrentWorkspace()
-        if self.ws_name is not None:
-            workspace.SwitchWorkspace(self.ws_name, True)
-        if self.is_reset:
-            workspace.ResetWorkspace()
-
-        return workspace
-
-    def __exit__(self, *args):
-        if self.is_cleanup:
-            workspace.ResetWorkspace()
-        if self.ws_name is not None:
-            workspace.SwitchWorkspace(self.org_ws)
-
-
-def fetch_any_blob(name):
-    bb = None
-    try:
-        bb = workspace.FetchBlob(name)
-    except TypeError:
-        bb = workspace.FetchInt8Blob(name)
-    except Exception as e:
-        logger.error("Get blob {} error: {}".format(name, e))
-
-    return bb
-
-
-# ==== torch/utils_caffe2/protobuf.py ==========================================
-
-
-def get_pb_arg(pb, arg_name):
-    for x in pb.arg:
-        if x.name == arg_name:
-            return x
-    return None
-
-
-def get_pb_arg_valf(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return arg.f if arg is not None else default_val
-
-
-def get_pb_arg_floats(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return list(map(float, arg.floats)) if arg is not None else default_val
-
-
-def get_pb_arg_ints(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return list(map(int, arg.ints)) if arg is not None else default_val
-
-
-def get_pb_arg_vali(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return arg.i if arg is not None else default_val
-
-
-def get_pb_arg_vals(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return arg.s if arg is not None else default_val
-
-
-def get_pb_arg_valstrings(pb, arg_name, default_val):
-    arg = get_pb_arg(pb, arg_name)
-    return list(arg.strings) if arg is not None else default_val
-
-
-def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False):
-    arg = get_pb_arg(pb, arg_name)
-    if arg is None:
-        arg = putils.MakeArgument(arg_name, arg_value)
-        assert hasattr(arg, arg_attr)
-        pb.arg.extend([arg])
-    if allow_override and getattr(arg, arg_attr) != arg_value:
-        logger.warning(
-            "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value)
-        )
-        setattr(arg, arg_attr, arg_value)
-    else:
-        assert arg is not None
-        assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format(
-            getattr(arg, arg_attr), arg_value
-        )
-
-
-def _create_const_fill_op_from_numpy(name, tensor, device_option=None):
-    assert type(tensor) == np.ndarray
-    kTypeNameMapper = {
-        np.dtype("float32"): "GivenTensorFill",
-        np.dtype("int32"): "GivenTensorIntFill",
-        np.dtype("int64"): "GivenTensorInt64Fill",
-        np.dtype("uint8"): "GivenTensorStringFill",
-    }
-
-    args_dict = {}
-    if tensor.dtype == np.dtype("uint8"):
-        args_dict.update({"values": [str(tensor.data)], "shape": [1]})
-    else:
-        args_dict.update({"values": tensor, "shape": tensor.shape})
-
-    if device_option is not None:
-        args_dict["device_option"] = device_option
-
-    return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict)
-
-
-def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor):
-    assert type(int8_tensor) == workspace.Int8Tensor
-    kTypeNameMapper = {
-        np.dtype("int32"): "Int8GivenIntTensorFill",
-        np.dtype("uint8"): "Int8GivenTensorFill",
-    }
-
-    tensor = int8_tensor.data
-    assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")]
-    values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor
-
-    return core.CreateOperator(
-        kTypeNameMapper[tensor.dtype],
-        [],
-        [name],
-        values=values,
-        shape=tensor.shape,
-        Y_scale=int8_tensor.scale,
-        Y_zero_point=int8_tensor.zero_point,
-    )
-
-
-def create_const_fill_op(
-    name: str,
-    blob: Union[np.ndarray, workspace.Int8Tensor],
-    device_option: Optional[caffe2_pb2.DeviceOption] = None,
-) -> caffe2_pb2.OperatorDef:
-    """
-    Given a blob object, return the Caffe2 operator that creates this blob
-    as constant. Currently support NumPy tensor and Caffe2 Int8Tensor.
-    """
-
-    tensor_type = type(blob)
-    assert tensor_type in [
-        np.ndarray,
-        workspace.Int8Tensor,
-    ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format(
-        name, type(blob)
-    )
-
-    if tensor_type == np.ndarray:
-        return _create_const_fill_op_from_numpy(name, blob, device_option)
-    elif tensor_type == workspace.Int8Tensor:
-        assert device_option is None
-        return _create_const_fill_op_from_c2_int8_tensor(name, blob)
-
-
-def construct_init_net_from_params(
-    params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None
-) -> caffe2_pb2.NetDef:
-    """
-    Construct the init_net from params dictionary
-    """
-    init_net = caffe2_pb2.NetDef()
-    device_options = device_options or {}
-    for name, blob in params.items():
-        if isinstance(blob, str):
-            logger.warning(
-                (
-                    "Blob {} with type {} is not supported in generating init net,"
-                    " skipped.".format(name, type(blob))
-                )
-            )
-            continue
-        init_net.op.extend(
-            [create_const_fill_op(name, blob, device_option=device_options.get(name, None))]
-        )
-        init_net.external_output.append(name)
-    return init_net
-
-
-def get_producer_map(ssa):
-    """
-    Return dict from versioned blob to (i, j),
-        where i is index of producer op, j is the index of output of that op.
-    """
-    producer_map = {}
-    for i in range(len(ssa)):
-        outputs = ssa[i][1]
-        for j, outp in enumerate(outputs):
-            producer_map[outp] = (i, j)
-    return producer_map
-
-
-def get_consumer_map(ssa):
-    """
-    Return dict from versioned blob to list of (i, j),
-        where i is index of consumer op, j is the index of input of that op.
-    """
-    consumer_map = collections.defaultdict(list)
-    for i in range(len(ssa)):
-        inputs = ssa[i][0]
-        for j, inp in enumerate(inputs):
-            consumer_map[inp].append((i, j))
-    return consumer_map
-
-
-def get_params_from_init_net(
-    init_net: caffe2_pb2.NetDef,
-) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]:
-    """
-    Take the output blobs from init_net by running it.
-    Outputs:
-        params: dict from blob name to numpy array
-        device_options: dict from blob name to the device option of its creating op
-    """
-    # NOTE: this assumes that the params is determined by producer op with the
-    # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor.
-    def _get_device_option(producer_op):
-        if producer_op.type == "CopyGPUToCPU":
-            return caffe2_pb2.DeviceOption()
-        else:
-            return producer_op.device_option
-
-    with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws:
-        ws.RunNetOnce(init_net)
-        params = {b: fetch_any_blob(b) for b in init_net.external_output}
-    ssa, versions = core.get_ssa(init_net)
-    producer_map = get_producer_map(ssa)
-    device_options = {
-        b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]])
-        for b in init_net.external_output
-    }
-    return params, device_options
-
-
-def _updater_raise(op, input_types, output_types):
-    raise RuntimeError(
-        "Failed to apply updater for op {} given input_types {} and"
-        " output_types {}".format(op, input_types, output_types)
-    )
-
-
-def _generic_status_identifier(
-    predict_net: caffe2_pb2.NetDef,
-    status_updater: Callable,
-    known_status: Dict[Tuple[str, int], Any],
-) -> Dict[Tuple[str, int], Any]:
-    """
-    Statically infer the status of each blob, the status can be such as device type
-        (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here
-        is versioned blob (Tuple[str, int]) in the format compatible with ssa.
-    Inputs:
-        predict_net: the caffe2 network
-        status_updater: a callable, given an op and the status of its input/output,
-            it returns the updated status of input/output. `None` is used for
-            representing unknown status.
-        known_status: a dict containing known status, used as initialization.
-    Outputs:
-        A dict mapping from versioned blob to its status
-    """
-    ssa, versions = core.get_ssa(predict_net)
-    versioned_ext_input = [(b, 0) for b in predict_net.external_input]
-    versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output]
-    all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa])
-
-    allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output)
-    assert all(k in allowed_vbs for k in known_status)
-    assert all(v is not None for v in known_status.values())
-    _known_status = copy.deepcopy(known_status)
-
-    def _check_and_update(key, value):
-        assert value is not None
-        if key in _known_status:
-            if not _known_status[key] == value:
-                raise RuntimeError(
-                    "Confilict status for {}, existing status {}, new status {}".format(
-                        key, _known_status[key], value
-                    )
-                )
-        _known_status[key] = value
-
-    def _update_i(op, ssa_i):
-        versioned_inputs = ssa_i[0]
-        versioned_outputs = ssa_i[1]
-
-        inputs_status = [_known_status.get(b, None) for b in versioned_inputs]
-        outputs_status = [_known_status.get(b, None) for b in versioned_outputs]
-
-        new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status)
-
-        for versioned_blob, status in zip(
-            versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status
-        ):
-            if status is not None:
-                _check_and_update(versioned_blob, status)
-
-    for op, ssa_i in zip(predict_net.op, ssa):
-        _update_i(op, ssa_i)
-    for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)):
-        _update_i(op, ssa_i)
-
-    # NOTE: This strictly checks all the blob from predict_net must be assgined
-    # a known status. However sometimes it's impossible (eg. having deadend op),
-    # we may relax this constraint if
-    for k in all_versioned_blobs:
-        if k not in _known_status:
-            raise NotImplementedError(
-                "Can not infer the status for {}. Currently only support the case where"
-                " a single forward and backward pass can identify status for all blobs.".format(k)
-            )
-
-    return _known_status
-
-
-def infer_device_type(
-    predict_net: caffe2_pb2.NetDef,
-    known_status: Dict[Tuple[str, int], Any],
-    device_name_style: str = "caffe2",
-) -> Dict[Tuple[str, int], str]:
-    """ Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob """
-
-    assert device_name_style in ["caffe2", "pytorch"]
-    _CPU_STR = "cpu"
-    _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda"
-
-    def _copy_cpu_to_gpu_updater(op, input_types, output_types):
-        if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR:
-            _updater_raise(op, input_types, output_types)
-        return ([_CPU_STR], [_GPU_STR])
-
-    def _copy_gpu_to_cpu_updater(op, input_types, output_types):
-        if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR:
-            _updater_raise(op, input_types, output_types)
-        return ([_GPU_STR], [_CPU_STR])
-
-    def _other_ops_updater(op, input_types, output_types):
-        non_none_types = [x for x in input_types + output_types if x is not None]
-        if len(non_none_types) > 0:
-            the_type = non_none_types[0]
-            if not all(x == the_type for x in non_none_types):
-                _updater_raise(op, input_types, output_types)
-        else:
-            the_type = None
-        return ([the_type for _ in op.input], [the_type for _ in op.output])
-
-    def _device_updater(op, *args, **kwargs):
-        return {
-            "CopyCPUToGPU": _copy_cpu_to_gpu_updater,
-            "CopyGPUToCPU": _copy_gpu_to_cpu_updater,
-        }.get(op.type, _other_ops_updater)(op, *args, **kwargs)
-
-    return _generic_status_identifier(predict_net, _device_updater, known_status)
-
-
-# ==== torch/utils_caffe2/vis.py ===============================================
-
-
-def _modify_blob_names(ops, blob_rename_f):
-    ret = []
-
-    def _replace_list(blob_list, replaced_list):
-        del blob_list[:]
-        blob_list.extend(replaced_list)
-
-    for x in ops:
-        cur = copy.deepcopy(x)
-        _replace_list(cur.input, list(map(blob_rename_f, cur.input)))
-        _replace_list(cur.output, list(map(blob_rename_f, cur.output)))
-        ret.append(cur)
-
-    return ret
-
-
-def _rename_blob(name, blob_sizes, blob_ranges):
-    def _list_to_str(bsize):
-        ret = ", ".join([str(x) for x in bsize])
-        ret = "[" + ret + "]"
-        return ret
-
-    ret = name
-    if blob_sizes is not None and name in blob_sizes:
-        ret += "\n" + _list_to_str(blob_sizes[name])
-    if blob_ranges is not None and name in blob_ranges:
-        ret += "\n" + _list_to_str(blob_ranges[name])
-
-    return ret
-
-
-# graph_name could not contain word 'graph'
-def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None):
-    blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges)
-    return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f)
-
-
-def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None):
-    graph = None
-    ops = net.op
-    if blob_rename_func is not None:
-        ops = _modify_blob_names(ops, blob_rename_func)
-    if not op_only:
-        graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB")
-    else:
-        graph = net_drawer.GetPydotGraphMinimal(
-            ops, graph_name, rankdir="TB", minimal_dependency=True
-        )
-
-    try:
-        par_dir = os.path.dirname(file_name)
-        if not os.path.exists(par_dir):
-            os.makedirs(par_dir)
-
-        format = os.path.splitext(os.path.basename(file_name))[-1]
-        if format == ".png":
-            graph.write_png(file_name)
-        elif format == ".pdf":
-            graph.write_pdf(file_name)
-        elif format == ".svg":
-            graph.write_svg(file_name)
-        else:
-            print("Incorrect format {}".format(format))
-    except Exception as e:
-        print("Error when writing graph to image {}".format(e))
-
-    return graph
-
-
-# ==== torch/utils_toffee/aten_to_caffe2.py ====================================
-
-
-def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef):
-    """
-    For ONNX exported model, GroupNorm will be represented as ATen op,
-        this can be a drop in replacement from ATen to GroupNorm
-    """
-    count = 0
-    for op in predict_net.op:
-        if op.type == "ATen":
-            op_name = get_pb_arg_vals(op, "operator", None)  # return byte in py3
-            if op_name and op_name.decode() == "group_norm":
-                op.arg.remove(get_pb_arg(op, "operator"))
-
-                if get_pb_arg_vali(op, "cudnn_enabled", None):
-                    op.arg.remove(get_pb_arg(op, "cudnn_enabled"))
-
-                num_groups = get_pb_arg_vali(op, "num_groups", None)
-                if num_groups is not None:
-                    op.arg.remove(get_pb_arg(op, "num_groups"))
-                    check_set_pb_arg(op, "group", "i", num_groups)
-
-                op.type = "GroupNorm"
-                count += 1
-    if count > 1:
-        logger.info("Replaced {} ATen operator to GroupNormOp".format(count))
-
-
-# ==== torch/utils_toffee/alias.py =============================================
-
-
-def alias(x, name, is_backward=False):
-    if not torch.onnx.is_in_onnx_export():
-        return x
-    assert isinstance(x, torch.Tensor)
-    return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward)
-
-
-def fuse_alias_placeholder(predict_net, init_net):
-    """ Remove AliasWithName placeholder and rename the input/output of it """
-    # First we finish all the re-naming
-    for i, op in enumerate(predict_net.op):
-        if op.type == "AliasWithName":
-            assert len(op.input) == 1
-            assert len(op.output) == 1
-            name = get_pb_arg_vals(op, "name", None).decode()
-            is_backward = bool(get_pb_arg_vali(op, "is_backward", 0))
-            rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward)
-            rename_op_output(predict_net, i, 0, name)
-
-    # Remove AliasWithName, should be very safe since it's a non-op
-    new_ops = []
-    for op in predict_net.op:
-        if op.type != "AliasWithName":
-            new_ops.append(op)
-        else:
-            # safety check
-            assert op.input == op.output
-            assert op.input[0] == op.arg[0].s.decode()
-    del predict_net.op[:]
-    predict_net.op.extend(new_ops)
-
-
-# ==== torch/utils_caffe2/graph_transform.py ===================================
-
-
-class IllegalGraphTransformError(ValueError):
-    """ When a graph transform function call can't be executed. """
-
-
-def _rename_versioned_blob_in_proto(
-    proto: caffe2_pb2.NetDef,
-    old_name: str,
-    new_name: str,
-    version: int,
-    ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]],
-    start_versions: Dict[str, int],
-    end_versions: Dict[str, int],
-):
-    """ In given proto, rename all blobs with matched version """
-    # Operater list
-    for op, i_th_ssa in zip(proto.op, ssa):
-        versioned_inputs, versioned_outputs = i_th_ssa
-        for i in range(len(op.input)):
-            if versioned_inputs[i] == (old_name, version):
-                op.input[i] = new_name
-        for i in range(len(op.output)):
-            if versioned_outputs[i] == (old_name, version):
-                op.output[i] = new_name
-    # external_input
-    if start_versions.get(old_name, 0) == version:
-        for i in range(len(proto.external_input)):
-            if proto.external_input[i] == old_name:
-                proto.external_input[i] = new_name
-    # external_output
-    if end_versions.get(old_name, 0) == version:
-        for i in range(len(proto.external_output)):
-            if proto.external_output[i] == old_name:
-                proto.external_output[i] = new_name
-
-
-def rename_op_input(
-    predict_net: caffe2_pb2.NetDef,
-    init_net: caffe2_pb2.NetDef,
-    op_id: int,
-    input_id: int,
-    new_name: str,
-    from_producer: bool = False,
-):
-    """
-    Rename the op_id-th operator in predict_net, change it's input_id-th input's
-        name to the new_name. It also does automatic re-route and change
-        external_input and init_net if necessary.
-    - It requires the input is only consumed by this op.
-    - This function modifies predict_net and init_net in-place.
-    - When from_producer is enable, this also updates other operators that consumes
-        the same input. Be cautious because may trigger unintended behavior.
-    """
-    assert isinstance(predict_net, caffe2_pb2.NetDef)
-    assert isinstance(init_net, caffe2_pb2.NetDef)
-
-    init_net_ssa, init_net_versions = core.get_ssa(init_net)
-    predict_net_ssa, predict_net_versions = core.get_ssa(
-        predict_net, copy.deepcopy(init_net_versions)
-    )
-
-    versioned_inputs, versioned_outputs = predict_net_ssa[op_id]
-    old_name, version = versioned_inputs[input_id]
-
-    if from_producer:
-        producer_map = get_producer_map(predict_net_ssa)
-        if not (old_name, version) in producer_map:
-            raise NotImplementedError(
-                "Can't find producer, the input {} is probably from"
-                " init_net, this is not supported yet.".format(old_name)
-            )
-        producer = producer_map[(old_name, version)]
-        rename_op_output(predict_net, producer[0], producer[1], new_name)
-        return
-
-    def contain_targets(op_ssa):
-        return (old_name, version) in op_ssa[0]
-
-    is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa]
-    if sum(is_consumer) > 1:
-        raise IllegalGraphTransformError(
-            (
-                "Input '{}' of operator(#{}) are consumed by other ops, please use"
-                + " rename_op_output on the producer instead. Offending op: \n{}"
-            ).format(old_name, op_id, predict_net.op[op_id])
-        )
-
-    # update init_net
-    _rename_versioned_blob_in_proto(
-        init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions
-    )
-    # update predict_net
-    _rename_versioned_blob_in_proto(
-        predict_net,
-        old_name,
-        new_name,
-        version,
-        predict_net_ssa,
-        init_net_versions,
-        predict_net_versions,
-    )
-
-
-def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str):
-    """
-    Rename the op_id-th operator in predict_net, change it's output_id-th input's
-        name to the new_name. It also does automatic re-route and change
-        external_output and if necessary.
-    - It allows multiple consumers of its output.
-    - This function modifies predict_net in-place, doesn't need init_net.
-    """
-    assert isinstance(predict_net, caffe2_pb2.NetDef)
-
-    ssa, blob_versions = core.get_ssa(predict_net)
-
-    versioned_inputs, versioned_outputs = ssa[op_id]
-    old_name, version = versioned_outputs[output_id]
-
-    # update predict_net
-    _rename_versioned_blob_in_proto(
-        predict_net, old_name, new_name, version, ssa, {}, blob_versions
-    )
-
-
-def get_sub_graph_external_input_output(
-    predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int]
-) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]:
-    """
-    Return the list of external input/output of sub-graph,
-    each element is tuple of the name and corresponding version in predict_net.
-
-    external input/output is defined the same way as caffe2 NetDef.
-    """
-    ssa, versions = core.get_ssa(predict_net)
-
-    all_inputs = []
-    all_outputs = []
-    for op_id in sub_graph_op_indices:
-        all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs]
-        all_outputs += list(ssa[op_id][1])  # ssa output won't repeat
-
-    # for versioned blobs, external inputs are just those blob in all_inputs
-    # but not in all_outputs
-    ext_inputs = [inp for inp in all_inputs if inp not in all_outputs]
-
-    # external outputs are essentially outputs of this subgraph that are used
-    # outside of this sub-graph (including predict_net.external_output)
-    all_other_inputs = sum(
-        (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices),
-        [(outp, versions[outp]) for outp in predict_net.external_output],
-    )
-    ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)]
-
-    return ext_inputs, ext_outputs
-
-
-class DiGraph:
-    """ A DAG representation of caffe2 graph, each vertice is a versioned blob. """
-
-    def __init__(self):
-        self.vertices = set()
-        self.graph = collections.defaultdict(list)
-
-    def add_edge(self, u, v):
-        self.graph[u].append(v)
-        self.vertices.add(u)
-        self.vertices.add(v)
-
-    # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/
-    def get_all_paths(self, s, d):
-        visited = {k: False for k in self.vertices}
-        path = []
-        all_paths = []
-
-        def _get_all_paths_util(graph, u, d, visited, path):
-            visited[u] = True
-            path.append(u)
-            if u == d:
-                all_paths.append(copy.deepcopy(path))
-            else:
-                for i in graph[u]:
-                    if not visited[i]:
-                        _get_all_paths_util(graph, i, d, visited, path)
-            path.pop()
-            visited[u] = False
-
-        _get_all_paths_util(self.graph, s, d, visited, path)
-        return all_paths
-
-    @staticmethod
-    def from_ssa(ssa):
-        graph = DiGraph()
-        for op_id in range(len(ssa)):
-            for inp in ssa[op_id][0]:
-                for outp in ssa[op_id][1]:
-                    graph.add_edge(inp, outp)
-        return graph
-
-
-def _get_dependency_chain(ssa, versioned_target, versioned_source):
-    """
-    Return the index list of relevant operator to produce target blob from source blob,
-        if there's no dependency, return empty list.
-    """
-
-    # finding all paths between nodes can be O(N!), thus we can only search
-    # in the subgraph using the op starting from the first consumer of source blob
-    # to the producer of the target blob.
-    consumer_map = get_consumer_map(ssa)
-    producer_map = get_producer_map(ssa)
-    start_op = min(x[0] for x in consumer_map[versioned_source]) - 15
-    end_op = (
-        producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op
-    )
-    sub_graph_ssa = ssa[start_op : end_op + 1]
-    if len(sub_graph_ssa) > 30:
-        logger.warning(
-            "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it"
-            " might take non-trival time to find all paths between them.".format(
-                versioned_source, versioned_target, start_op, end_op
-            )
-        )
-
-    dag = DiGraph.from_ssa(sub_graph_ssa)
-    paths = dag.get_all_paths(versioned_source, versioned_target)  # include two ends
-    ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths]
-    return sorted(set().union(*[set(ops) for ops in ops_in_paths]))
-
-
-def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]:
-    """
-    Idenfity the reshape sub-graph in a protobuf.
-    The reshape sub-graph is defined as matching the following pattern:
-
-    (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐
-        └-------------------------------------------> Reshape -> (output_blob)
-
-    Return:
-        List of sub-graphs, each sub-graph is represented as a list of indices
-        of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape]
-    """
-
-    ssa, _ = core.get_ssa(predict_net)
-
-    ret = []
-    for i, op in enumerate(predict_net.op):
-        if op.type == "Reshape":
-            assert len(op.input) == 2
-            input_ssa = ssa[i][0]
-            data_source = input_ssa[0]
-            shape_source = input_ssa[1]
-            op_indices = _get_dependency_chain(ssa, shape_source, data_source)
-            ret.append(op_indices + [i])
-    return ret
-
-
-def remove_reshape_for_fc(predict_net, params):
-    """
-    In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape
-        a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping
-        doesn't work well with ONNX and Int8 tools, and cause using extra
-        ops (eg. ExpandDims) that might not be available on mobile.
-    Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape
-        after exporting ONNX model.
-    """
-    from caffe2.python import core
-
-    # find all reshape sub-graph that can be removed, which is now all Reshape
-    # sub-graph whose output is only consumed by FC.
-    # TODO: to make it safer, we may need the actually value to better determine
-    # if a Reshape before FC is removable.
-    reshape_sub_graphs = identify_reshape_sub_graph(predict_net)
-    sub_graphs_to_remove = []
-    for reshape_sub_graph in reshape_sub_graphs:
-        reshape_op_id = reshape_sub_graph[-1]
-        assert predict_net.op[reshape_op_id].type == "Reshape"
-        ssa, _ = core.get_ssa(predict_net)
-        reshape_output = ssa[reshape_op_id][1][0]
-        consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]]
-        if all(predict_net.op[consumer].type == "FC" for consumer in consumers):
-            # safety check if the sub-graph is isolated, for this reshape sub-graph,
-            # it means it has one non-param external input and one external output.
-            ext_inputs, ext_outputs = get_sub_graph_external_input_output(
-                predict_net, reshape_sub_graph
-            )
-            non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
-            if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1:
-                sub_graphs_to_remove.append(reshape_sub_graph)
-
-    # perform removing subgraph by:
-    # 1: rename the Reshape's output to its input, then the graph can be
-    #   seen as in-place itentify, meaning whose external input/output are the same.
-    # 2: simply remove those ops.
-    remove_op_ids = []
-    params_to_remove = []
-    for sub_graph in sub_graphs_to_remove:
-        logger.info(
-            "Remove Reshape sub-graph:\n{}".format(
-                "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph])
-            )
-        )
-        reshape_op_id = sub_graph[-1]
-        new_reshap_output = predict_net.op[reshape_op_id].input[0]
-        rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output)
-        ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph)
-        non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
-        params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0]
-        assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1
-        assert ext_outputs[0][0] == non_params_ext_inputs[0][0]
-        assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1
-        remove_op_ids.extend(sub_graph)
-        params_to_remove.extend(params_ext_inputs)
-
-    predict_net = copy.deepcopy(predict_net)
-    new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids]
-    del predict_net.op[:]
-    predict_net.op.extend(new_ops)
-    for versioned_params in params_to_remove:
-        name = versioned_params[0]
-        logger.info("Remove params: {} from init_net and predict_net.external_input".format(name))
-        del params[name]
-        predict_net.external_input.remove(name)
-
-    return predict_net, params
-
-
-def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef):
-    """
-    In-place fuse extra copy ops between cpu/gpu for the following case:
-        a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1
-                        -CopyBToA> c2 -NextOp2-> d2
-    The fused network will look like:
-        a -NextOp1-> d1
-          -NextOp2-> d2
-    """
-
-    _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"]
-
-    def _fuse_once(predict_net):
-        ssa, blob_versions = core.get_ssa(predict_net)
-        consumer_map = get_consumer_map(ssa)
-        versioned_external_output = [
-            (name, blob_versions[name]) for name in predict_net.external_output
-        ]
-
-        for op_id, op in enumerate(predict_net.op):
-            if op.type in _COPY_OPS:
-                fw_copy_versioned_output = ssa[op_id][1][0]
-                consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]]
-                reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)]
-
-                is_fusable = (
-                    len(consumer_ids) > 0
-                    and fw_copy_versioned_output not in versioned_external_output
-                    and all(
-                        predict_net.op[_op_id].type == reverse_op_type
-                        and ssa[_op_id][1][0] not in versioned_external_output
-                        for _op_id in consumer_ids
-                    )
-                )
-
-                if is_fusable:
-                    for rv_copy_op_id in consumer_ids:
-                        # making each NextOp uses "a" directly and removing Copy ops
-                        rs_copy_versioned_output = ssa[rv_copy_op_id][1][0]
-                        next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0]
-                        predict_net.op[next_op_id].input[inp_id] = op.input[0]
-                    # remove CopyOps
-                    new_ops = [
-                        op
-                        for i, op in enumerate(predict_net.op)
-                        if i != op_id and i not in consumer_ids
-                    ]
-                    del predict_net.op[:]
-                    predict_net.op.extend(new_ops)
-                    return True
-
-        return False
-
-    # _fuse_once returns False is nothing can be fused
-    while _fuse_once(predict_net):
-        pass
-
-
-def remove_dead_end_ops(net_def: caffe2_pb2.NetDef):
-    """ remove ops if its output is not used or not in external_output """
-    ssa, versions = core.get_ssa(net_def)
-    versioned_external_output = [(name, versions[name]) for name in net_def.external_output]
-    consumer_map = get_consumer_map(ssa)
-    removed_op_ids = set()
-
-    def _is_dead_end(versioned_blob):
-        return not (
-            versioned_blob in versioned_external_output
-            or (
-                len(consumer_map[versioned_blob]) > 0
-                and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob])
-            )
-        )
-
-    for i, ssa_i in reversed(list(enumerate(ssa))):
-        versioned_outputs = ssa_i[1]
-        if all(_is_dead_end(outp) for outp in versioned_outputs):
-            removed_op_ids.add(i)
-
-    # simply removing those deadend ops should have no effect to external_output
-    new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids]
-    del net_def.op[:]
-    net_def.op.extend(new_ops)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py
deleted file mode 100644
index 2753739..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
-from .deform_conv import DeformConv, ModulatedDeformConv
-from .mask_ops import paste_masks_in_image
-from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
-from .roi_align import ROIAlign, roi_align
-from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
-from .shape_spec import ShapeSpec
-from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear
-from .blocks import CNNBlockBase
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py
deleted file mode 100644
index 1339c6e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import torch
-import torch.distributed as dist
-from torch import nn
-from torch.autograd.function import Function
-from torch.nn import functional as F
-
-from detectron2.utils import comm
-
-from .wrappers import BatchNorm2d
-
-TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
-
-
-class FrozenBatchNorm2d(nn.Module):
-    """
-    BatchNorm2d where the batch statistics and the affine parameters are fixed.
-
-    It contains non-trainable buffers called
-    "weight" and "bias", "running_mean", "running_var",
-    initialized to perform identity transformation.
-
-    The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
-    which are computed from the original four parameters of BN.
-    The affine transform `x * weight + bias` will perform the equivalent
-    computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
-    When loading a backbone model from Caffe2, "running_mean" and "running_var"
-    will be left unchanged as identity transformation.
-
-    Other pre-trained backbone models may contain all 4 parameters.
-
-    The forward is implemented by `F.batch_norm(..., training=False)`.
-    """
-
-    _version = 3
-
-    def __init__(self, num_features, eps=1e-5):
-        super().__init__()
-        self.num_features = num_features
-        self.eps = eps
-        self.register_buffer("weight", torch.ones(num_features))
-        self.register_buffer("bias", torch.zeros(num_features))
-        self.register_buffer("running_mean", torch.zeros(num_features))
-        self.register_buffer("running_var", torch.ones(num_features) - eps)
-
-    def forward(self, x):
-        if x.requires_grad:
-            # When gradients are needed, F.batch_norm will use extra memory
-            # because its backward op computes gradients for weight/bias as well.
-            scale = self.weight * (self.running_var + self.eps).rsqrt()
-            bias = self.bias - self.running_mean * scale
-            scale = scale.reshape(1, -1, 1, 1)
-            bias = bias.reshape(1, -1, 1, 1)
-            return x * scale + bias
-        else:
-            # When gradients are not needed, F.batch_norm is a single fused op
-            # and provide more optimization opportunities.
-            return F.batch_norm(
-                x,
-                self.running_mean,
-                self.running_var,
-                self.weight,
-                self.bias,
-                training=False,
-                eps=self.eps,
-            )
-
-    def _load_from_state_dict(
-        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
-    ):
-        version = local_metadata.get("version", None)
-
-        if version is None or version < 2:
-            # No running_mean/var in early versions
-            # This will silent the warnings
-            if prefix + "running_mean" not in state_dict:
-                state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean)
-            if prefix + "running_var" not in state_dict:
-                state_dict[prefix + "running_var"] = torch.ones_like(self.running_var)
-
-        if version is not None and version < 3:
-            logger = logging.getLogger(__name__)
-            logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip(".")))
-            # In version < 3, running_var are used without +eps.
-            state_dict[prefix + "running_var"] -= self.eps
-
-        super()._load_from_state_dict(
-            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
-        )
-
-    def __repr__(self):
-        return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps)
-
-    @classmethod
-    def convert_frozen_batchnorm(cls, module):
-        """
-        Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
-
-        Args:
-            module (torch.nn.Module):
-
-        Returns:
-            If module is BatchNorm/SyncBatchNorm, returns a new module.
-            Otherwise, in-place convert module and return it.
-
-        Similar to convert_sync_batchnorm in
-        https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
-        """
-        bn_module = nn.modules.batchnorm
-        bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm)
-        res = module
-        if isinstance(module, bn_module):
-            res = cls(module.num_features)
-            if module.affine:
-                res.weight.data = module.weight.data.clone().detach()
-                res.bias.data = module.bias.data.clone().detach()
-            res.running_mean.data = module.running_mean.data
-            res.running_var.data = module.running_var.data
-            res.eps = module.eps
-        else:
-            for name, child in module.named_children():
-                new_child = cls.convert_frozen_batchnorm(child)
-                if new_child is not child:
-                    res.add_module(name, new_child)
-        return res
-
-
-def get_norm(norm, out_channels):
-    """
-    Args:
-        norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
-            or a callable that takes a channel number and returns
-            the normalization layer as a nn.Module.
-
-    Returns:
-        nn.Module or None: the normalization layer
-    """
-    if isinstance(norm, str):
-        if len(norm) == 0:
-            return None
-        norm = {
-            "BN": BatchNorm2d,
-            # Fixed in https://github.com/pytorch/pytorch/pull/36382
-            "SyncBN": NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
-            "FrozenBN": FrozenBatchNorm2d,
-            "GN": lambda channels: nn.GroupNorm(32, channels),
-            # for debugging:
-            "nnSyncBN": nn.SyncBatchNorm,
-            "naiveSyncBN": NaiveSyncBatchNorm,
-        }[norm]
-    return norm(out_channels)
-
-
-class AllReduce(Function):
-    @staticmethod
-    def forward(ctx, input):
-        input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())]
-        # Use allgather instead of allreduce since I don't trust in-place operations ..
-        dist.all_gather(input_list, input, async_op=False)
-        inputs = torch.stack(input_list, dim=0)
-        return torch.sum(inputs, dim=0)
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        dist.all_reduce(grad_output, async_op=False)
-        return grad_output
-
-
-class NaiveSyncBatchNorm(BatchNorm2d):
-    """
-    In PyTorch<=1.5, `nn.SyncBatchNorm` has incorrect gradient
-    when the batch size on each worker is different.
-    (e.g., when scale augmentation is used, or when it is applied to mask head).
-
-    This is a slower but correct alternative to `nn.SyncBatchNorm`.
-
-    Note:
-        There isn't a single definition of Sync BatchNorm.
-
-        When ``stats_mode==""``, this module computes overall statistics by using
-        statistics of each worker with equal weight.  The result is true statistics
-        of all samples (as if they are all on one worker) only when all workers
-        have the same (N, H, W). This mode does not support inputs with zero batch size.
-
-        When ``stats_mode=="N"``, this module computes overall statistics by weighting
-        the statistics of each worker by their ``N``. The result is true statistics
-        of all samples (as if they are all on one worker) only when all workers
-        have the same (H, W). It is slower than ``stats_mode==""``.
-
-        Even though the result of this module may not be the true statistics of all samples,
-        it may still be reasonable because it might be preferrable to assign equal weights
-        to all workers, regardless of their (H, W) dimension, instead of putting larger weight
-        on larger images. From preliminary experiments, little difference is found between such
-        a simplified implementation and an accurate computation of overall mean & variance.
-    """
-
-    def __init__(self, *args, stats_mode="", **kwargs):
-        super().__init__(*args, **kwargs)
-        assert stats_mode in ["", "N"]
-        self._stats_mode = stats_mode
-
-    def forward(self, input):
-        if comm.get_world_size() == 1 or not self.training:
-            return super().forward(input)
-
-        B, C = input.shape[0], input.shape[1]
-
-        mean = torch.mean(input, dim=[0, 2, 3])
-        meansqr = torch.mean(input * input, dim=[0, 2, 3])
-
-        if self._stats_mode == "":
-            assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.'
-            vec = torch.cat([mean, meansqr], dim=0)
-            vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())
-            mean, meansqr = torch.split(vec, C)
-            momentum = self.momentum
-        else:
-            if B == 0:
-                vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype)
-                vec = vec + input.sum()  # make sure there is gradient w.r.t input
-            else:
-                vec = torch.cat(
-                    [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0
-                )
-            vec = AllReduce.apply(vec * B)
-
-            total_batch = vec[-1].detach()
-            momentum = total_batch.clamp(max=1) * self.momentum  # no update if total_batch is 0
-            total_batch = torch.max(total_batch, torch.ones_like(total_batch))  # avoid div-by-zero
-            mean, meansqr, _ = torch.split(vec / total_batch, C)
-
-        var = meansqr - mean * mean
-        invstd = torch.rsqrt(var + self.eps)
-        scale = self.weight * invstd
-        bias = self.bias - mean * scale
-        scale = scale.reshape(1, -1, 1, 1)
-        bias = bias.reshape(1, -1, 1, 1)
-
-        self.running_mean += momentum * (mean.detach() - self.running_mean)
-        self.running_var += momentum * (var.detach() - self.running_var)
-        return input * scale + bias
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py
deleted file mode 100644
index 1d06fec..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from torch import nn
-
-from .batch_norm import FrozenBatchNorm2d
-
-
-class CNNBlockBase(nn.Module):
-    """
-    A CNN block is assumed to have input channels, output channels and a stride.
-    The input and output of `forward()` method must be NCHW tensors.
-    The method can perform arbitrary computation but must match the given
-    channels and stride specification.
-
-    Attribute:
-        in_channels (int):
-        out_channels (int):
-        stride (int):
-    """
-
-    def __init__(self, in_channels, out_channels, stride):
-        """
-        The `__init__` method of any subclass should also contain these arguments.
-
-        Args:
-            in_channels (int):
-            out_channels (int):
-            stride (int):
-        """
-        super().__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.stride = stride
-
-    def freeze(self):
-        """
-        Make this block not trainable.
-        This method sets all parameters to `requires_grad=False`,
-        and convert all BatchNorm layers to FrozenBatchNorm
-
-        Returns:
-            the block itself
-        """
-        for p in self.parameters():
-            p.requires_grad = False
-        FrozenBatchNorm2d.convert_frozen_batchnorm(self)
-        return self
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md
deleted file mode 100644
index 778ed3d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-To add a new Op:
-
-1. Create a new directory
-2. Implement new ops there
-3. Delcare its Python interface in `vision.cpp`.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h
deleted file mode 100644
index 2d95eac..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace detectron2 {
-
-at::Tensor ROIAlign_forward_cpu(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    bool aligned);
-
-at::Tensor ROIAlign_backward_cpu(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
-    bool aligned);
-
-#ifdef WITH_CUDA
-at::Tensor ROIAlign_forward_cuda(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    bool aligned);
-
-at::Tensor ROIAlign_backward_cuda(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
-    bool aligned);
-#endif
-
-// Interface for Python
-inline at::Tensor ROIAlign_forward(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    bool aligned) {
-  if (input.is_cuda()) {
-#ifdef WITH_CUDA
-    return ROIAlign_forward_cuda(
-        input,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        sampling_ratio,
-        aligned);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  return ROIAlign_forward_cpu(
-      input,
-      rois,
-      spatial_scale,
-      pooled_height,
-      pooled_width,
-      sampling_ratio,
-      aligned);
-}
-
-inline at::Tensor ROIAlign_backward(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
-    bool aligned) {
-  if (grad.is_cuda()) {
-#ifdef WITH_CUDA
-    return ROIAlign_backward_cuda(
-        grad,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        batch_size,
-        channels,
-        height,
-        width,
-        sampling_ratio,
-        aligned);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  return ROIAlign_backward_cpu(
-      grad,
-      rois,
-      spatial_scale,
-      pooled_height,
-      pooled_width,
-      batch_size,
-      channels,
-      height,
-      width,
-      sampling_ratio,
-      aligned);
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp
deleted file mode 100644
index 52fc83f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp
+++ /dev/null
@@ -1,508 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/TensorUtils.h>
-#include "ROIAlign.h"
-
-namespace {
-
-// implementation taken from Caffe2
-template <typename T>
-struct PreCalc {
-  int pos1;
-  int pos2;
-  int pos3;
-  int pos4;
-  T w1;
-  T w2;
-  T w3;
-  T w4;
-};
-
-template <typename T>
-void pre_calc_for_bilinear_interpolate(
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int iy_upper,
-    const int ix_upper,
-    T roi_start_h,
-    T roi_start_w,
-    T bin_size_h,
-    T bin_size_w,
-    int roi_bin_grid_h,
-    int roi_bin_grid_w,
-    std::vector<PreCalc<T>>& pre_calc) {
-  int pre_calc_index = 0;
-  for (int ph = 0; ph < pooled_height; ph++) {
-    for (int pw = 0; pw < pooled_width; pw++) {
-      for (int iy = 0; iy < iy_upper; iy++) {
-        const T yy = roi_start_h + ph * bin_size_h +
-            static_cast<T>(iy + .5f) * bin_size_h /
-                static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-        for (int ix = 0; ix < ix_upper; ix++) {
-          const T xx = roi_start_w + pw * bin_size_w +
-              static_cast<T>(ix + .5f) * bin_size_w /
-                  static_cast<T>(roi_bin_grid_w);
-
-          T x = xx;
-          T y = yy;
-          // deal with: inverse elements are out of feature map boundary
-          if (y < -1.0 || y > height || x < -1.0 || x > width) {
-            // empty
-            PreCalc<T> pc;
-            pc.pos1 = 0;
-            pc.pos2 = 0;
-            pc.pos3 = 0;
-            pc.pos4 = 0;
-            pc.w1 = 0;
-            pc.w2 = 0;
-            pc.w3 = 0;
-            pc.w4 = 0;
-            pre_calc[pre_calc_index] = pc;
-            pre_calc_index += 1;
-            continue;
-          }
-
-          if (y <= 0) {
-            y = 0;
-          }
-          if (x <= 0) {
-            x = 0;
-          }
-
-          int y_low = (int)y;
-          int x_low = (int)x;
-          int y_high;
-          int x_high;
-
-          if (y_low >= height - 1) {
-            y_high = y_low = height - 1;
-            y = (T)y_low;
-          } else {
-            y_high = y_low + 1;
-          }
-
-          if (x_low >= width - 1) {
-            x_high = x_low = width - 1;
-            x = (T)x_low;
-          } else {
-            x_high = x_low + 1;
-          }
-
-          T ly = y - y_low;
-          T lx = x - x_low;
-          T hy = 1. - ly, hx = 1. - lx;
-          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-          // save weights and indices
-          PreCalc<T> pc;
-          pc.pos1 = y_low * width + x_low;
-          pc.pos2 = y_low * width + x_high;
-          pc.pos3 = y_high * width + x_low;
-          pc.pos4 = y_high * width + x_high;
-          pc.w1 = w1;
-          pc.w2 = w2;
-          pc.w3 = w3;
-          pc.w4 = w4;
-          pre_calc[pre_calc_index] = pc;
-
-          pre_calc_index += 1;
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-void ROIAlignForward(
-    const int nthreads,
-    const T* input,
-    const T& spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    const T* rois,
-    T* output,
-    bool aligned) {
-  int n_rois = nthreads / channels / pooled_width / pooled_height;
-  // (n, c, ph, pw) is an element in the pooled output
-  // can be parallelized using omp
-  // #pragma omp parallel for num_threads(32)
-  for (int n = 0; n < n_rois; n++) {
-    int index_n = n * channels * pooled_width * pooled_height;
-
-    const T* offset_rois = rois + n * 5;
-    int roi_batch_ind = offset_rois[0];
-
-    // Do not use rounding; this implementation detail is critical
-    T offset = aligned ? (T)0.5 : (T)0.0;
-    T roi_start_w = offset_rois[1] * spatial_scale - offset;
-    T roi_start_h = offset_rois[2] * spatial_scale - offset;
-    T roi_end_w = offset_rois[3] * spatial_scale - offset;
-    T roi_end_h = offset_rois[4] * spatial_scale - offset;
-
-    T roi_width = roi_end_w - roi_start_w;
-    T roi_height = roi_end_h - roi_start_h;
-    if (aligned) {
-      AT_ASSERTM(
-          roi_width >= 0 && roi_height >= 0,
-          "ROIs in ROIAlign cannot have non-negative size!");
-    } else { // for backward-compatibility only
-      roi_width = std::max(roi_width, (T)1.);
-      roi_height = std::max(roi_height, (T)1.);
-    }
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    // When the grid is empty, output zeros == 0/1, instead of NaN.
-    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
-
-    // we want to precalculate indices and weights shared by all channels,
-    // this is the key point of optimization
-    std::vector<PreCalc<T>> pre_calc(
-        roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
-    pre_calc_for_bilinear_interpolate(
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        roi_bin_grid_h,
-        roi_bin_grid_w,
-        roi_start_h,
-        roi_start_w,
-        bin_size_h,
-        bin_size_w,
-        roi_bin_grid_h,
-        roi_bin_grid_w,
-        pre_calc);
-
-    for (int c = 0; c < channels; c++) {
-      int index_n_c = index_n + c * pooled_width * pooled_height;
-      const T* offset_input =
-          input + (roi_batch_ind * channels + c) * height * width;
-      int pre_calc_index = 0;
-
-      for (int ph = 0; ph < pooled_height; ph++) {
-        for (int pw = 0; pw < pooled_width; pw++) {
-          int index = index_n_c + ph * pooled_width + pw;
-
-          T output_val = 0.;
-          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
-            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-              PreCalc<T> pc = pre_calc[pre_calc_index];
-              output_val += pc.w1 * offset_input[pc.pos1] +
-                  pc.w2 * offset_input[pc.pos2] +
-                  pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4];
-
-              pre_calc_index += 1;
-            }
-          }
-          output_val /= count;
-
-          output[index] = output_val;
-        } // for pw
-      } // for ph
-    } // for c
-  } // for n
-}
-
-template <typename T>
-void bilinear_interpolate_gradient(
-    const int height,
-    const int width,
-    T y,
-    T x,
-    T& w1,
-    T& w2,
-    T& w3,
-    T& w4,
-    int& x_low,
-    int& x_high,
-    int& y_low,
-    int& y_high,
-    const int index /* index for debug only*/) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    w1 = w2 = w3 = w4 = 0.;
-    x_low = x_high = y_low = y_high = -1;
-    return;
-  }
-
-  if (y <= 0)
-    y = 0;
-  if (x <= 0)
-    x = 0;
-
-  y_low = (int)y;
-  x_low = (int)x;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-
-  // reference in forward
-  // T v1 = input[y_low * width + x_low];
-  // T v2 = input[y_low * width + x_high];
-  // T v3 = input[y_high * width + x_low];
-  // T v4 = input[y_high * width + x_high];
-  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  return;
-}
-
-template <class T>
-inline void add(T* address, const T& val) {
-  *address += val;
-}
-
-template <typename T>
-void ROIAlignBackward(
-    const int nthreads,
-    // may not be contiguous, and should be indexed using n_stride, etc
-    const T* grad_output,
-    const T& spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    T* grad_input,
-    const T* rois,
-    const int n_stride,
-    const int c_stride,
-    const int h_stride,
-    const int w_stride,
-    bool aligned) {
-  for (int index = 0; index < nthreads; index++) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* offset_rois = rois + n * 5;
-    int roi_batch_ind = offset_rois[0];
-
-    // Do not use rounding; this implementation detail is critical
-    T offset = aligned ? (T)0.5 : (T)0.0;
-    T roi_start_w = offset_rois[1] * spatial_scale - offset;
-    T roi_start_h = offset_rois[2] * spatial_scale - offset;
-    T roi_end_w = offset_rois[3] * spatial_scale - offset;
-    T roi_end_h = offset_rois[4] * spatial_scale - offset;
-
-    T roi_width = roi_end_w - roi_start_w;
-    T roi_height = roi_end_h - roi_start_h;
-    if (aligned) {
-      AT_ASSERTM(
-          roi_width >= 0 && roi_height >= 0,
-          "ROIs in ROIAlign do not have non-negative size!");
-    } else { // for backward-compatibility only
-      roi_width = std::max(roi_width, (T)1.);
-      roi_height = std::max(roi_height, (T)1.);
-    }
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    T* offset_grad_input =
-        grad_input + ((roi_batch_ind * channels + c) * height * width);
-
-    int output_offset = n * n_stride + c * c_stride;
-    const T* offset_grad_output = grad_output + output_offset;
-    const T grad_output_this_bin =
-        offset_grad_output[ph * h_stride + pw * w_stride];
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
-      const T y = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T x = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        T w1, w2, w3, w4;
-        int x_low, x_high, y_low, y_high;
-
-        bilinear_interpolate_gradient(
-            height,
-            width,
-            y,
-            x,
-            w1,
-            w2,
-            w3,
-            w4,
-            x_low,
-            x_high,
-            y_low,
-            y_high,
-            index);
-
-        T g1 = grad_output_this_bin * w1 / count;
-        T g2 = grad_output_this_bin * w2 / count;
-        T g3 = grad_output_this_bin * w3 / count;
-        T g4 = grad_output_this_bin * w4 / count;
-
-        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
-          // atomic add is not needed for now since it is single threaded
-          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
-          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
-          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
-          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
-        } // if
-      } // ix
-    } // iy
-  } // for
-} // ROIAlignBackward
-
-} // namespace
-
-namespace detectron2 {
-
-at::Tensor ROIAlign_forward_cpu(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    bool aligned) {
-  AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor");
-  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
-
-  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlign_forward_cpu";
-  at::checkAllSameType(c, {input_t, rois_t});
-
-  auto num_rois = rois.size(0);
-  auto channels = input.size(1);
-  auto height = input.size(2);
-  auto width = input.size(3);
-
-  at::Tensor output = at::zeros(
-      {num_rois, channels, pooled_height, pooled_width}, input.options());
-
-  auto output_size = num_rois * pooled_height * pooled_width * channels;
-
-  if (output.numel() == 0)
-    return output;
-
-  auto input_ = input.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      input.scalar_type(), "ROIAlign_forward", [&] {
-        ROIAlignForward<scalar_t>(
-            output_size,
-            input_.data_ptr<scalar_t>(),
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            rois_.data_ptr<scalar_t>(),
-            output.data_ptr<scalar_t>(),
-            aligned);
-      });
-  return output;
-}
-
-at::Tensor ROIAlign_backward_cpu(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
-    bool aligned) {
-  AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor");
-  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
-
-  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlign_backward_cpu";
-  at::checkAllSameType(c, {grad_t, rois_t});
-
-  at::Tensor grad_input =
-      at::zeros({batch_size, channels, height, width}, grad.options());
-
-  // handle possibly empty gradients
-  if (grad.numel() == 0) {
-    return grad_input;
-  }
-
-  // get stride values to ensure indexing into gradients is correct.
-  int n_stride = grad.stride(0);
-  int c_stride = grad.stride(1);
-  int h_stride = grad.stride(2);
-  int w_stride = grad.stride(3);
-
-  auto rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      grad.scalar_type(), "ROIAlign_forward", [&] {
-        ROIAlignBackward<scalar_t>(
-            grad.numel(),
-            grad.data_ptr<scalar_t>(),
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            grad_input.data_ptr<scalar_t>(),
-            rois_.data_ptr<scalar_t>(),
-            n_stride,
-            c_stride,
-            h_stride,
-            w_stride,
-            aligned);
-      });
-  return grad_input;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu
deleted file mode 100644
index 2e05953..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAApplyUtils.cuh>
-
-// TODO make it in a common file
-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
-template <typename T>
-__device__ T bilinear_interpolate(
-    const T* bottom_data,
-    const int height,
-    const int width,
-    T y,
-    T x,
-    const int index /* index for debug only*/) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    return 0;
-  }
-
-  if (y <= 0)
-    y = 0;
-  if (x <= 0)
-    x = 0;
-
-  int y_low = (int)y;
-  int x_low = (int)x;
-  int y_high;
-  int x_high;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-  // do bilinear interpolation
-  T v1 = bottom_data[y_low * width + x_low];
-  T v2 = bottom_data[y_low * width + x_high];
-  T v3 = bottom_data[y_high * width + x_low];
-  T v4 = bottom_data[y_high * width + x_high];
-  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  return val;
-}
-
-template <typename T>
-__global__ void RoIAlignForward(
-    const int nthreads,
-    const T* bottom_data,
-    const T spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    const T* bottom_rois,
-    T* top_data,
-    bool aligned) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* offset_bottom_rois = bottom_rois + n * 5;
-    int roi_batch_ind = offset_bottom_rois[0];
-
-    // Do not use rounding; this implementation detail is critical
-    T offset = aligned ? (T)0.5 : (T)0.0;
-    T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset;
-    T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset;
-    T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset;
-    T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset;
-
-    T roi_width = roi_end_w - roi_start_w;
-    T roi_height = roi_end_h - roi_start_h;
-    if (!aligned) { // for backward-compatibility only
-      roi_width = max(roi_width, (T)1.);
-      roi_height = max(roi_height, (T)1.);
-    }
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    const T* offset_bottom_data =
-        bottom_data + (roi_batch_ind * channels + c) * height * width;
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    // When the grid is empty, output zeros == 0/1, instead of NaN.
-    const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
-
-    T output_val = 0.;
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
-    {
-      const T y = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T x = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        T val = bilinear_interpolate(
-            offset_bottom_data, height, width, y, x, index);
-        output_val += val;
-      }
-    }
-    output_val /= count;
-
-    top_data[index] = output_val;
-  }
-}
-
-template <typename T>
-__device__ void bilinear_interpolate_gradient(
-    const int height,
-    const int width,
-    T y,
-    T x,
-    T& w1,
-    T& w2,
-    T& w3,
-    T& w4,
-    int& x_low,
-    int& x_high,
-    int& y_low,
-    int& y_high,
-    const int index /* index for debug only*/) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    w1 = w2 = w3 = w4 = 0.;
-    x_low = x_high = y_low = y_high = -1;
-    return;
-  }
-
-  if (y <= 0)
-    y = 0;
-  if (x <= 0)
-    x = 0;
-
-  y_low = (int)y;
-  x_low = (int)x;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-
-  // reference in forward
-  // T v1 = bottom_data[y_low * width + x_low];
-  // T v2 = bottom_data[y_low * width + x_high];
-  // T v3 = bottom_data[y_high * width + x_low];
-  // T v4 = bottom_data[y_high * width + x_high];
-  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  return;
-}
-
-template <typename T>
-__global__ void RoIAlignBackwardFeature(
-    const int nthreads,
-    const T* top_diff,
-    const int num_rois,
-    const T spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    T* bottom_diff,
-    const T* bottom_rois,
-    bool aligned) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* offset_bottom_rois = bottom_rois + n * 5;
-    int roi_batch_ind = offset_bottom_rois[0];
-
-    // Do not use rounding; this implementation detail is critical
-    T offset = aligned ? (T)0.5 : (T)0.0;
-    T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset;
-    T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset;
-    T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset;
-    T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset;
-
-    T roi_width = roi_end_w - roi_start_w;
-    T roi_height = roi_end_h - roi_start_h;
-    if (!aligned) { // for backward-compatibility only
-      roi_width = max(roi_width, (T)1.);
-      roi_height = max(roi_height, (T)1.);
-    }
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    T* offset_bottom_diff =
-        bottom_diff + (roi_batch_ind * channels + c) * height * width;
-
-    int top_offset = (n * channels + c) * pooled_height * pooled_width;
-    const T* offset_top_diff = top_diff + top_offset;
-    const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
-    {
-      const T y = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T x = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        T w1, w2, w3, w4;
-        int x_low, x_high, y_low, y_high;
-
-        bilinear_interpolate_gradient(
-            height,
-            width,
-            y,
-            x,
-            w1,
-            w2,
-            w3,
-            w4,
-            x_low,
-            x_high,
-            y_low,
-            y_high,
-            index);
-
-        T g1 = top_diff_this_bin * w1 / count;
-        T g2 = top_diff_this_bin * w2 / count;
-        T g3 = top_diff_this_bin * w3 / count;
-        T g4 = top_diff_this_bin * w4 / count;
-
-        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
-          atomicAdd(
-              offset_bottom_diff + y_low * width + x_low, static_cast<T>(g1));
-          atomicAdd(
-              offset_bottom_diff + y_low * width + x_high, static_cast<T>(g2));
-          atomicAdd(
-              offset_bottom_diff + y_high * width + x_low, static_cast<T>(g3));
-          atomicAdd(
-              offset_bottom_diff + y_high * width + x_high, static_cast<T>(g4));
-        } // if
-      } // ix
-    } // iy
-  } // CUDA_1D_KERNEL_LOOP
-} // RoIAlignBackward
-
-namespace detectron2 {
-
-at::Tensor ROIAlign_forward_cuda(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    bool aligned) {
-  AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor");
-  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
-  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlign_forward_cuda";
-  at::checkAllSameGPU(c, {input_t, rois_t});
-  at::checkAllSameType(c, {input_t, rois_t});
-  at::cuda::CUDAGuard device_guard(input.device());
-
-  auto num_rois = rois.size(0);
-  auto channels = input.size(1);
-  auto height = input.size(2);
-  auto width = input.size(3);
-
-  auto output = at::empty(
-      {num_rois, channels, pooled_height, pooled_width}, input.options());
-  auto output_size = num_rois * pooled_height * pooled_width * channels;
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(
-      at::cuda::ATenCeilDiv(
-          static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
-      static_cast<int64_t>(4096)));
-  dim3 block(512);
-
-  if (output.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return output;
-  }
-
-  auto input_ = input.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] {
-    RoIAlignForward<scalar_t><<<grid, block, 0, stream>>>(
-        output_size,
-        input_.data_ptr<scalar_t>(),
-        spatial_scale,
-        channels,
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        sampling_ratio,
-        rois_.data_ptr<scalar_t>(),
-        output.data_ptr<scalar_t>(),
-        aligned);
-  });
-  cudaDeviceSynchronize();
-  AT_CUDA_CHECK(cudaGetLastError());
-  return output;
-}
-
-// TODO remove the dependency on input and use instead its sizes -> save memory
-at::Tensor ROIAlign_backward_cuda(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
-    bool aligned) {
-  AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor");
-  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
-
-  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
-  at::CheckedFrom c = "ROIAlign_backward_cuda";
-  at::checkAllSameGPU(c, {grad_t, rois_t});
-  at::checkAllSameType(c, {grad_t, rois_t});
-  at::cuda::CUDAGuard device_guard(grad.device());
-
-  auto num_rois = rois.size(0);
-  auto grad_input =
-      at::zeros({batch_size, channels, height, width}, grad.options());
-
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(
-      at::cuda::ATenCeilDiv(
-          static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
-      static_cast<int64_t>(4096)));
-  dim3 block(512);
-
-  // handle possibly empty gradients
-  if (grad.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return grad_input;
-  }
-
-  auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] {
-    RoIAlignBackwardFeature<scalar_t><<<grid, block, 0, stream>>>(
-        grad.numel(),
-        grad_.data_ptr<scalar_t>(),
-        num_rois,
-        spatial_scale,
-        channels,
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        sampling_ratio,
-        grad_input.data_ptr<scalar_t>(),
-        rois_.data_ptr<scalar_t>(),
-        aligned);
-  });
-  AT_CUDA_CHECK(cudaGetLastError());
-  return grad_input;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
deleted file mode 100644
index a99c8eb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace detectron2 {
-
-at::Tensor ROIAlignRotated_forward_cpu(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio);
-
-at::Tensor ROIAlignRotated_backward_cpu(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio);
-
-#ifdef WITH_CUDA
-at::Tensor ROIAlignRotated_forward_cuda(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio);
-
-at::Tensor ROIAlignRotated_backward_cuda(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio);
-#endif
-
-// Interface for Python
-inline at::Tensor ROIAlignRotated_forward(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio) {
-  if (input.is_cuda()) {
-#ifdef WITH_CUDA
-    return ROIAlignRotated_forward_cuda(
-        input,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        sampling_ratio);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  return ROIAlignRotated_forward_cpu(
-      input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
-}
-
-inline at::Tensor ROIAlignRotated_backward(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio) {
-  if (grad.is_cuda()) {
-#ifdef WITH_CUDA
-    return ROIAlignRotated_backward_cuda(
-        grad,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        batch_size,
-        channels,
-        height,
-        width,
-        sampling_ratio);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  return ROIAlignRotated_backward_cpu(
-      grad,
-      rois,
-      spatial_scale,
-      pooled_height,
-      pooled_width,
-      batch_size,
-      channels,
-      height,
-      width,
-      sampling_ratio);
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
deleted file mode 100644
index 7e5e1ff..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/TensorUtils.h>
-#include "ROIAlignRotated.h"
-
-// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
-// and PyTorch ROIAlign (non-rotated) Op implementations.
-// The key difference between this implementation and those ones is
-// we don't do "legacy offset" in this version, as there aren't many previous
-// works, if any, using the "legacy" ROIAlignRotated Op.
-// This would make the interface a bit cleaner.
-
-namespace detectron2 {
-
-namespace {
-template <typename T>
-struct PreCalc {
-  int pos1;
-  int pos2;
-  int pos3;
-  int pos4;
-  T w1;
-  T w2;
-  T w3;
-  T w4;
-};
-
-template <typename T>
-void pre_calc_for_bilinear_interpolate(
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int iy_upper,
-    const int ix_upper,
-    T roi_start_h,
-    T roi_start_w,
-    T bin_size_h,
-    T bin_size_w,
-    int roi_bin_grid_h,
-    int roi_bin_grid_w,
-    T roi_center_h,
-    T roi_center_w,
-    T cos_theta,
-    T sin_theta,
-    std::vector<PreCalc<T>>& pre_calc) {
-  int pre_calc_index = 0;
-  for (int ph = 0; ph < pooled_height; ph++) {
-    for (int pw = 0; pw < pooled_width; pw++) {
-      for (int iy = 0; iy < iy_upper; iy++) {
-        const T yy = roi_start_h + ph * bin_size_h +
-            static_cast<T>(iy + .5f) * bin_size_h /
-                static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-        for (int ix = 0; ix < ix_upper; ix++) {
-          const T xx = roi_start_w + pw * bin_size_w +
-              static_cast<T>(ix + .5f) * bin_size_w /
-                  static_cast<T>(roi_bin_grid_w);
-
-          // Rotate by theta around the center and translate
-          // In image space, (y, x) is the order for Right Handed System,
-          // and this is essentially multiplying the point by a rotation matrix
-          // to rotate it counterclockwise through angle theta.
-          T y = yy * cos_theta - xx * sin_theta + roi_center_h;
-          T x = yy * sin_theta + xx * cos_theta + roi_center_w;
-          // deal with: inverse elements are out of feature map boundary
-          if (y < -1.0 || y > height || x < -1.0 || x > width) {
-            // empty
-            PreCalc<T> pc;
-            pc.pos1 = 0;
-            pc.pos2 = 0;
-            pc.pos3 = 0;
-            pc.pos4 = 0;
-            pc.w1 = 0;
-            pc.w2 = 0;
-            pc.w3 = 0;
-            pc.w4 = 0;
-            pre_calc[pre_calc_index] = pc;
-            pre_calc_index += 1;
-            continue;
-          }
-
-          if (y < 0) {
-            y = 0;
-          }
-          if (x < 0) {
-            x = 0;
-          }
-
-          int y_low = (int)y;
-          int x_low = (int)x;
-          int y_high;
-          int x_high;
-
-          if (y_low >= height - 1) {
-            y_high = y_low = height - 1;
-            y = (T)y_low;
-          } else {
-            y_high = y_low + 1;
-          }
-
-          if (x_low >= width - 1) {
-            x_high = x_low = width - 1;
-            x = (T)x_low;
-          } else {
-            x_high = x_low + 1;
-          }
-
-          T ly = y - y_low;
-          T lx = x - x_low;
-          T hy = 1. - ly, hx = 1. - lx;
-          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-          // save weights and indices
-          PreCalc<T> pc;
-          pc.pos1 = y_low * width + x_low;
-          pc.pos2 = y_low * width + x_high;
-          pc.pos3 = y_high * width + x_low;
-          pc.pos4 = y_high * width + x_high;
-          pc.w1 = w1;
-          pc.w2 = w2;
-          pc.w3 = w3;
-          pc.w4 = w4;
-          pre_calc[pre_calc_index] = pc;
-
-          pre_calc_index += 1;
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-void bilinear_interpolate_gradient(
-    const int height,
-    const int width,
-    T y,
-    T x,
-    T& w1,
-    T& w2,
-    T& w3,
-    T& w4,
-    int& x_low,
-    int& x_high,
-    int& y_low,
-    int& y_high) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    w1 = w2 = w3 = w4 = 0.;
-    x_low = x_high = y_low = y_high = -1;
-    return;
-  }
-
-  if (y < 0) {
-    y = 0;
-  }
-
-  if (x < 0) {
-    x = 0;
-  }
-
-  y_low = (int)y;
-  x_low = (int)x;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-
-  // reference in forward
-  // T v1 = input[y_low * width + x_low];
-  // T v2 = input[y_low * width + x_high];
-  // T v3 = input[y_high * width + x_low];
-  // T v4 = input[y_high * width + x_high];
-  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  return;
-}
-
-template <class T>
-inline void add(T* address, const T& val) {
-  *address += val;
-}
-
-} // namespace
-
-template <typename T>
-void ROIAlignRotatedForward(
-    const int nthreads,
-    const T* input,
-    const T& spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    const T* rois,
-    T* output) {
-  int n_rois = nthreads / channels / pooled_width / pooled_height;
-  // (n, c, ph, pw) is an element in the pooled output
-  // can be parallelized using omp
-  // #pragma omp parallel for num_threads(32)
-  for (int n = 0; n < n_rois; n++) {
-    int index_n = n * channels * pooled_width * pooled_height;
-
-    const T* current_roi = rois + n * 6;
-    int roi_batch_ind = current_roi[0];
-
-    // Do not use rounding; this implementation detail is critical
-    // ROIAlignRotated supports align == true, i.e., continuous coordinate
-    // by default, thus the 0.5 offset
-    T offset = (T)0.5;
-    T roi_center_w = current_roi[1] * spatial_scale - offset;
-    T roi_center_h = current_roi[2] * spatial_scale - offset;
-    T roi_width = current_roi[3] * spatial_scale;
-    T roi_height = current_roi[4] * spatial_scale;
-    T theta = current_roi[5] * M_PI / 180.0;
-    T cos_theta = cos(theta);
-    T sin_theta = sin(theta);
-
-    AT_ASSERTM(
-        roi_width >= 0 && roi_height >= 0,
-        "ROIs in ROIAlignRotated do not have non-negative size!");
-
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
-
-    // we want to precalculate indices and weights shared by all channels,
-    // this is the key point of optimization
-    std::vector<PreCalc<T>> pre_calc(
-        roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
-
-    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
-    // Appropriate translation needs to be applied after.
-    T roi_start_h = -roi_height / 2.0;
-    T roi_start_w = -roi_width / 2.0;
-
-    pre_calc_for_bilinear_interpolate(
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        roi_bin_grid_h,
-        roi_bin_grid_w,
-        roi_start_h,
-        roi_start_w,
-        bin_size_h,
-        bin_size_w,
-        roi_bin_grid_h,
-        roi_bin_grid_w,
-        roi_center_h,
-        roi_center_w,
-        cos_theta,
-        sin_theta,
-        pre_calc);
-
-    for (int c = 0; c < channels; c++) {
-      int index_n_c = index_n + c * pooled_width * pooled_height;
-      const T* offset_input =
-          input + (roi_batch_ind * channels + c) * height * width;
-      int pre_calc_index = 0;
-
-      for (int ph = 0; ph < pooled_height; ph++) {
-        for (int pw = 0; pw < pooled_width; pw++) {
-          int index = index_n_c + ph * pooled_width + pw;
-
-          T output_val = 0.;
-          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
-            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-              PreCalc<T> pc = pre_calc[pre_calc_index];
-              output_val += pc.w1 * offset_input[pc.pos1] +
-                  pc.w2 * offset_input[pc.pos2] +
-                  pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4];
-
-              pre_calc_index += 1;
-            }
-          }
-          output_val /= count;
-
-          output[index] = output_val;
-        } // for pw
-      } // for ph
-    } // for c
-  } // for n
-}
-
-template <typename T>
-void ROIAlignRotatedBackward(
-    const int nthreads,
-    // may not be contiguous. should index using n_stride, etc
-    const T* grad_output,
-    const T& spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    T* grad_input,
-    const T* rois,
-    const int n_stride,
-    const int c_stride,
-    const int h_stride,
-    const int w_stride) {
-  for (int index = 0; index < nthreads; index++) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* current_roi = rois + n * 6;
-    int roi_batch_ind = current_roi[0];
-
-    // Do not use rounding; this implementation detail is critical
-    // ROIAlignRotated supports align == true, i.e., continuous coordinate
-    // by default, thus the 0.5 offset
-    T offset = (T)0.5;
-    T roi_center_w = current_roi[1] * spatial_scale - offset;
-    T roi_center_h = current_roi[2] * spatial_scale - offset;
-    T roi_width = current_roi[3] * spatial_scale;
-    T roi_height = current_roi[4] * spatial_scale;
-    T theta = current_roi[5] * M_PI / 180.0;
-    T cos_theta = cos(theta);
-    T sin_theta = sin(theta);
-
-    AT_ASSERTM(
-        roi_width >= 0 && roi_height >= 0,
-        "ROIs in ROIAlignRotated do not have non-negative size!");
-
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    T* offset_grad_input =
-        grad_input + ((roi_batch_ind * channels + c) * height * width);
-
-    int output_offset = n * n_stride + c * c_stride;
-    const T* offset_grad_output = grad_output + output_offset;
-    const T grad_output_this_bin =
-        offset_grad_output[ph * h_stride + pw * w_stride];
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
-    // Appropriate translation needs to be applied after.
-    T roi_start_h = -roi_height / 2.0;
-    T roi_start_w = -roi_width / 2.0;
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
-      const T yy = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T xx = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        // Rotate by theta around the center and translate
-        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
-        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
-
-        T w1, w2, w3, w4;
-        int x_low, x_high, y_low, y_high;
-
-        bilinear_interpolate_gradient(
-            height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
-
-        T g1 = grad_output_this_bin * w1 / count;
-        T g2 = grad_output_this_bin * w2 / count;
-        T g3 = grad_output_this_bin * w3 / count;
-        T g4 = grad_output_this_bin * w4 / count;
-
-        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
-          // atomic add is not needed for now since it is single threaded
-          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
-          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
-          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
-          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
-        } // if
-      } // ix
-    } // iy
-  } // for
-} // ROIAlignRotatedBackward
-
-at::Tensor ROIAlignRotated_forward_cpu(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio) {
-  AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor");
-  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
-
-  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlign_forward_cpu";
-  at::checkAllSameType(c, {input_t, rois_t});
-
-  auto num_rois = rois.size(0);
-  auto channels = input.size(1);
-  auto height = input.size(2);
-  auto width = input.size(3);
-
-  at::Tensor output = at::zeros(
-      {num_rois, channels, pooled_height, pooled_width}, input.options());
-
-  auto output_size = num_rois * pooled_height * pooled_width * channels;
-
-  if (output.numel() == 0) {
-    return output;
-  }
-
-  auto input_ = input.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      input.scalar_type(), "ROIAlignRotated_forward", [&] {
-        ROIAlignRotatedForward<scalar_t>(
-            output_size,
-            input_.data_ptr<scalar_t>(),
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            rois_.data_ptr<scalar_t>(),
-            output.data_ptr<scalar_t>());
-      });
-  return output;
-}
-
-at::Tensor ROIAlignRotated_backward_cpu(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio) {
-  AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor");
-  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
-
-  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlignRotated_backward_cpu";
-  at::checkAllSameType(c, {grad_t, rois_t});
-
-  at::Tensor grad_input =
-      at::zeros({batch_size, channels, height, width}, grad.options());
-
-  // handle possibly empty gradients
-  if (grad.numel() == 0) {
-    return grad_input;
-  }
-
-  // get stride values to ensure indexing into gradients is correct.
-  int n_stride = grad.stride(0);
-  int c_stride = grad.stride(1);
-  int h_stride = grad.stride(2);
-  int w_stride = grad.stride(3);
-
-  auto rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      grad.scalar_type(), "ROIAlignRotated_forward", [&] {
-        ROIAlignRotatedBackward<scalar_t>(
-            grad.numel(),
-            grad.data_ptr<scalar_t>(),
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            grad_input.data_ptr<scalar_t>(),
-            rois_.data_ptr<scalar_t>(),
-            n_stride,
-            c_stride,
-            h_stride,
-            w_stride);
-      });
-  return grad_input;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
deleted file mode 100644
index 9c376fc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
+++ /dev/null
@@ -1,443 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAApplyUtils.cuh>
-
-// TODO make it in a common file
-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
-// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
-// and PyTorch ROIAlign (non-rotated) Op implementations.
-// The key difference between this implementation and those ones is
-// we don't do "legacy offset" in this version, as there aren't many previous
-// works, if any, using the "legacy" ROIAlignRotated Op.
-// This would make the interface a bit cleaner.
-
-namespace detectron2 {
-
-namespace {
-
-template <typename T>
-__device__ T bilinear_interpolate(
-    const T* input,
-    const int height,
-    const int width,
-    T y,
-    T x) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    return 0;
-  }
-
-  if (y < 0) {
-    y = 0;
-  }
-
-  if (x < 0) {
-    x = 0;
-  }
-
-  int y_low = (int)y;
-  int x_low = (int)x;
-  int y_high;
-  int x_high;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-  // do bilinear interpolation
-  T v1 = input[y_low * width + x_low];
-  T v2 = input[y_low * width + x_high];
-  T v3 = input[y_high * width + x_low];
-  T v4 = input[y_high * width + x_high];
-  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  return val;
-}
-
-template <typename T>
-__device__ void bilinear_interpolate_gradient(
-    const int height,
-    const int width,
-    T y,
-    T x,
-    T& w1,
-    T& w2,
-    T& w3,
-    T& w4,
-    int& x_low,
-    int& x_high,
-    int& y_low,
-    int& y_high) {
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    // empty
-    w1 = w2 = w3 = w4 = 0.;
-    x_low = x_high = y_low = y_high = -1;
-    return;
-  }
-
-  if (y < 0) {
-    y = 0;
-  }
-
-  if (x < 0) {
-    x = 0;
-  }
-
-  y_low = (int)y;
-  x_low = (int)x;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T)y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T)x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-
-  // reference in forward
-  // T v1 = input[y_low * width + x_low];
-  // T v2 = input[y_low * width + x_high];
-  // T v3 = input[y_high * width + x_low];
-  // T v4 = input[y_high * width + x_high];
-  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  return;
-}
-
-} // namespace
-
-template <typename T>
-__global__ void RoIAlignRotatedForward(
-    const int nthreads,
-    const T* input,
-    const T spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    const T* rois,
-    T* top_data) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* current_roi = rois + n * 6;
-    int roi_batch_ind = current_roi[0];
-
-    // Do not use rounding; this implementation detail is critical
-    // ROIAlignRotated supports align == true, i.e., continuous coordinate
-    // by default, thus the 0.5 offset
-    T offset = (T)0.5;
-    T roi_center_w = current_roi[1] * spatial_scale - offset;
-    T roi_center_h = current_roi[2] * spatial_scale - offset;
-    T roi_width = current_roi[3] * spatial_scale;
-    T roi_height = current_roi[4] * spatial_scale;
-    T theta = current_roi[5] * M_PI / 180.0;
-    T cos_theta = cos(theta);
-    T sin_theta = sin(theta);
-
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    const T* offset_input =
-        input + (roi_batch_ind * channels + c) * height * width;
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
-    // Appropriate translation needs to be applied after.
-    T roi_start_h = -roi_height / 2.0;
-    T roi_start_w = -roi_width / 2.0;
-
-    // We do average (inte  gral) pooling inside a bin
-    const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
-
-    T output_val = 0.;
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
-    {
-      const T yy = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T xx = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        // Rotate by theta around the center and translate
-        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
-        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
-
-        T val = bilinear_interpolate(offset_input, height, width, y, x);
-        output_val += val;
-      }
-    }
-    output_val /= count;
-
-    top_data[index] = output_val;
-  }
-}
-
-template <typename T>
-__global__ void RoIAlignRotatedBackwardFeature(
-    const int nthreads,
-    const T* top_diff,
-    const int num_rois,
-    const T spatial_scale,
-    const int channels,
-    const int height,
-    const int width,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
-    T* bottom_diff,
-    const T* rois) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* current_roi = rois + n * 6;
-    int roi_batch_ind = current_roi[0];
-
-    // Do not use rounding; this implementation detail is critical
-    // ROIAlignRotated supports align == true, i.e., continuous coordinate
-    // by default, thus the 0.5 offset
-    T offset = (T)0.5;
-    T roi_center_w = current_roi[1] * spatial_scale - offset;
-    T roi_center_h = current_roi[2] * spatial_scale - offset;
-    T roi_width = current_roi[3] * spatial_scale;
-    T roi_height = current_roi[4] * spatial_scale;
-    T theta = current_roi[5] * M_PI / 180.0;
-    T cos_theta = cos(theta);
-    T sin_theta = sin(theta);
-
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    T* offset_bottom_diff =
-        bottom_diff + (roi_batch_ind * channels + c) * height * width;
-
-    int top_offset = (n * channels + c) * pooled_height * pooled_width;
-    const T* offset_top_diff = top_diff + top_offset;
-    const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0)
-        ? sampling_ratio
-        : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w =
-        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
-    // Appropriate translation needs to be applied after.
-    T roi_start_h = -roi_height / 2.0;
-    T roi_start_w = -roi_width / 2.0;
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
-    {
-      const T yy = roi_start_h + ph * bin_size_h +
-          static_cast<T>(iy + .5f) * bin_size_h /
-              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
-        const T xx = roi_start_w + pw * bin_size_w +
-            static_cast<T>(ix + .5f) * bin_size_w /
-                static_cast<T>(roi_bin_grid_w);
-
-        // Rotate by theta around the center and translate
-        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
-        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
-
-        T w1, w2, w3, w4;
-        int x_low, x_high, y_low, y_high;
-
-        bilinear_interpolate_gradient(
-            height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
-
-        T g1 = top_diff_this_bin * w1 / count;
-        T g2 = top_diff_this_bin * w2 / count;
-        T g3 = top_diff_this_bin * w3 / count;
-        T g4 = top_diff_this_bin * w4 / count;
-
-        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
-          atomicAdd(
-              offset_bottom_diff + y_low * width + x_low, static_cast<T>(g1));
-          atomicAdd(
-              offset_bottom_diff + y_low * width + x_high, static_cast<T>(g2));
-          atomicAdd(
-              offset_bottom_diff + y_high * width + x_low, static_cast<T>(g3));
-          atomicAdd(
-              offset_bottom_diff + y_high * width + x_high, static_cast<T>(g4));
-        } // if
-      } // ix
-    } // iy
-  } // CUDA_1D_KERNEL_LOOP
-} // RoIAlignRotatedBackward
-
-at::Tensor ROIAlignRotated_forward_cuda(
-    const at::Tensor& input,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio) {
-  AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor");
-  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
-  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
-
-  at::CheckedFrom c = "ROIAlignRotated_forward_cuda";
-  at::checkAllSameGPU(c, {input_t, rois_t});
-  at::checkAllSameType(c, {input_t, rois_t});
-  at::cuda::CUDAGuard device_guard(input.device());
-
-  auto num_rois = rois.size(0);
-  auto channels = input.size(1);
-  auto height = input.size(2);
-  auto width = input.size(3);
-
-  auto output = at::empty(
-      {num_rois, channels, pooled_height, pooled_width}, input.options());
-  auto output_size = num_rois * pooled_height * pooled_width * channels;
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(
-      at::cuda::ATenCeilDiv(
-          static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
-      static_cast<int64_t>(4096)));
-  dim3 block(512);
-
-  if (output.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return output;
-  }
-
-  auto input_ = input.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(
-      input.scalar_type(), "ROIAlignRotated_forward", [&] {
-        RoIAlignRotatedForward<scalar_t><<<grid, block, 0, stream>>>(
-            output_size,
-            input_.data_ptr<scalar_t>(),
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            rois_.data_ptr<scalar_t>(),
-            output.data_ptr<scalar_t>());
-      });
-  cudaDeviceSynchronize();
-  AT_CUDA_CHECK(cudaGetLastError());
-  return output;
-}
-
-// TODO remove the dependency on input and use instead its sizes -> save memory
-at::Tensor ROIAlignRotated_backward_cuda(
-    const at::Tensor& grad,
-    const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio) {
-  AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor");
-  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
-
-  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
-  at::CheckedFrom c = "ROIAlign_backward_cuda";
-  at::checkAllSameGPU(c, {grad_t, rois_t});
-  at::checkAllSameType(c, {grad_t, rois_t});
-  at::cuda::CUDAGuard device_guard(grad.device());
-
-  auto num_rois = rois.size(0);
-  auto grad_input =
-      at::zeros({batch_size, channels, height, width}, grad.options());
-
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(
-      at::cuda::ATenCeilDiv(
-          static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
-      static_cast<int64_t>(4096)));
-  dim3 block(512);
-
-  // handle possibly empty gradients
-  if (grad.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return grad_input;
-  }
-
-  auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(
-      grad.scalar_type(), "ROIAlignRotated_backward", [&] {
-        RoIAlignRotatedBackwardFeature<scalar_t><<<grid, block, 0, stream>>>(
-            grad.numel(),
-            grad_.data_ptr<scalar_t>(),
-            num_rois,
-            spatial_scale,
-            channels,
-            height,
-            width,
-            pooled_height,
-            pooled_width,
-            sampling_ratio,
-            grad_input.data_ptr<scalar_t>(),
-            rois_.data_ptr<scalar_t>());
-      });
-  AT_CUDA_CHECK(cudaGetLastError());
-  return grad_input;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
deleted file mode 100644
index 7c389c6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace detectron2 {
-
-at::Tensor box_iou_rotated_cpu(
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2);
-
-#ifdef WITH_CUDA
-at::Tensor box_iou_rotated_cuda(
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2);
-#endif
-
-// Interface for Python
-// inline is needed to prevent multiple function definitions when this header is
-// included by different cpps
-inline at::Tensor box_iou_rotated(
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2) {
-  assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
-  if (boxes1.device().is_cuda()) {
-#ifdef WITH_CUDA
-    return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous());
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-
-  return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous());
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
deleted file mode 100644
index f2b02d1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include "box_iou_rotated.h"
-#include "box_iou_rotated_utils.h"
-
-namespace detectron2 {
-
-template <typename T>
-void box_iou_rotated_cpu_kernel(
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2,
-    at::Tensor& ious) {
-  auto num_boxes1 = boxes1.size(0);
-  auto num_boxes2 = boxes2.size(0);
-
-  for (int i = 0; i < num_boxes1; i++) {
-    for (int j = 0; j < num_boxes2; j++) {
-      ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
-          boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
-    }
-  }
-}
-
-at::Tensor box_iou_rotated_cpu(
-    // input must be contiguous:
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2) {
-  auto num_boxes1 = boxes1.size(0);
-  auto num_boxes2 = boxes2.size(0);
-  at::Tensor ious =
-      at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
-
-  box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
-
-  // reshape from 1d array to 2d array
-  auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
-  return ious.reshape(shape);
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
deleted file mode 100644
index e3403c1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAApplyUtils.cuh>
-#include "box_iou_rotated_utils.h"
-
-namespace detectron2 {
-
-// 2D block with 32 * 16 = 512 threads per block
-const int BLOCK_DIM_X = 32;
-const int BLOCK_DIM_Y = 16;
-
-template <typename T>
-__global__ void box_iou_rotated_cuda_kernel(
-    const int n_boxes1,
-    const int n_boxes2,
-    const T* dev_boxes1,
-    const T* dev_boxes2,
-    T* dev_ious) {
-  const int row_start = blockIdx.x * blockDim.x;
-  const int col_start = blockIdx.y * blockDim.y;
-
-  const int row_size = min(n_boxes1 - row_start, blockDim.x);
-  const int col_size = min(n_boxes2 - col_start, blockDim.y);
-
-  __shared__ float block_boxes1[BLOCK_DIM_X * 5];
-  __shared__ float block_boxes2[BLOCK_DIM_Y * 5];
-
-  // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
-  if (threadIdx.x < row_size && threadIdx.y == 0) {
-    block_boxes1[threadIdx.x * 5 + 0] =
-        dev_boxes1[(row_start + threadIdx.x) * 5 + 0];
-    block_boxes1[threadIdx.x * 5 + 1] =
-        dev_boxes1[(row_start + threadIdx.x) * 5 + 1];
-    block_boxes1[threadIdx.x * 5 + 2] =
-        dev_boxes1[(row_start + threadIdx.x) * 5 + 2];
-    block_boxes1[threadIdx.x * 5 + 3] =
-        dev_boxes1[(row_start + threadIdx.x) * 5 + 3];
-    block_boxes1[threadIdx.x * 5 + 4] =
-        dev_boxes1[(row_start + threadIdx.x) * 5 + 4];
-  }
-
-  if (threadIdx.x < col_size && threadIdx.y == 0) {
-    block_boxes2[threadIdx.x * 5 + 0] =
-        dev_boxes2[(col_start + threadIdx.x) * 5 + 0];
-    block_boxes2[threadIdx.x * 5 + 1] =
-        dev_boxes2[(col_start + threadIdx.x) * 5 + 1];
-    block_boxes2[threadIdx.x * 5 + 2] =
-        dev_boxes2[(col_start + threadIdx.x) * 5 + 2];
-    block_boxes2[threadIdx.x * 5 + 3] =
-        dev_boxes2[(col_start + threadIdx.x) * 5 + 3];
-    block_boxes2[threadIdx.x * 5 + 4] =
-        dev_boxes2[(col_start + threadIdx.x) * 5 + 4];
-  }
-  __syncthreads();
-
-  if (threadIdx.x < row_size && threadIdx.y < col_size) {
-    int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y;
-    dev_ious[offset] = single_box_iou_rotated<T>(
-        block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
-  }
-}
-
-at::Tensor box_iou_rotated_cuda(
-    // input must be contiguous
-    const at::Tensor& boxes1,
-    const at::Tensor& boxes2) {
-  using scalar_t = float;
-  AT_ASSERTM(
-      boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor");
-  AT_ASSERTM(
-      boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor");
-  AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor");
-  AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor");
-  at::cuda::CUDAGuard device_guard(boxes1.device());
-
-  auto num_boxes1 = boxes1.size(0);
-  auto num_boxes2 = boxes2.size(0);
-
-  at::Tensor ious =
-      at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
-
-  bool transpose = false;
-  if (num_boxes1 > 0 && num_boxes2 > 0) {
-    scalar_t *data1 = boxes1.data_ptr<scalar_t>(),
-             *data2 = boxes2.data_ptr<scalar_t>();
-
-    if (num_boxes2 > 65535 * BLOCK_DIM_Y) {
-      AT_ASSERTM(
-          num_boxes1 <= 65535 * BLOCK_DIM_Y,
-          "Too many boxes for box_iou_rotated_cuda!");
-      // x dim is allowed to be large, but y dim cannot,
-      // so we transpose the two to avoid "invalid configuration argument"
-      // error. We assume one of them is small. Otherwise the result is hard to
-      // fit in memory anyway.
-      std::swap(num_boxes1, num_boxes2);
-      std::swap(data1, data2);
-      transpose = true;
-    }
-
-    const int blocks_x =
-        at::cuda::ATenCeilDiv(static_cast<int>(num_boxes1), BLOCK_DIM_X);
-    const int blocks_y =
-        at::cuda::ATenCeilDiv(static_cast<int>(num_boxes2), BLOCK_DIM_Y);
-
-    dim3 blocks(blocks_x, blocks_y);
-    dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
-    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-    box_iou_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        num_boxes1,
-        num_boxes2,
-        data1,
-        data2,
-        (scalar_t*)ious.data_ptr<scalar_t>());
-
-    AT_CUDA_CHECK(cudaGetLastError());
-  }
-
-  // reshape from 1d array to 2d array
-  auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
-  if (transpose) {
-    return ious.view(shape).t();
-  } else {
-    return ious.view(shape);
-  }
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
deleted file mode 100644
index d8757ec..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-
-#include <cassert>
-#include <cmath>
-
-#ifdef __CUDACC__
-// Designates functions callable from the host (CPU) and the device (GPU)
-#define HOST_DEVICE __host__ __device__
-#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
-#else
-#include <algorithm>
-#define HOST_DEVICE
-#define HOST_DEVICE_INLINE HOST_DEVICE inline
-#endif
-
-namespace detectron2 {
-
-namespace {
-
-template <typename T>
-struct RotatedBox {
-  T x_ctr, y_ctr, w, h, a;
-};
-
-template <typename T>
-struct Point {
-  T x, y;
-  HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
-  HOST_DEVICE_INLINE Point operator+(const Point& p) const {
-    return Point(x + p.x, y + p.y);
-  }
-  HOST_DEVICE_INLINE Point& operator+=(const Point& p) {
-    x += p.x;
-    y += p.y;
-    return *this;
-  }
-  HOST_DEVICE_INLINE Point operator-(const Point& p) const {
-    return Point(x - p.x, y - p.y);
-  }
-  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
-    return Point(x * coeff, y * coeff);
-  }
-};
-
-template <typename T>
-HOST_DEVICE_INLINE T dot_2d(const Point<T>& A, const Point<T>& B) {
-  return A.x * B.x + A.y * B.y;
-}
-
-// R: result type. can be different from input type
-template <typename T, typename R = T>
-HOST_DEVICE_INLINE R cross_2d(const Point<T>& A, const Point<T>& B) {
-  return static_cast<R>(A.x) * static_cast<R>(B.y) -
-      static_cast<R>(B.x) * static_cast<R>(A.y);
-}
-
-template <typename T>
-HOST_DEVICE_INLINE void get_rotated_vertices(
-    const RotatedBox<T>& box,
-    Point<T> (&pts)[4]) {
-  // M_PI / 180. == 0.01745329251
-  double theta = box.a * 0.01745329251;
-  T cosTheta2 = (T)cos(theta) * 0.5f;
-  T sinTheta2 = (T)sin(theta) * 0.5f;
-
-  // y: top --> down; x: left --> right
-  pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w;
-  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
-  pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w;
-  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
-  pts[2].x = 2 * box.x_ctr - pts[0].x;
-  pts[2].y = 2 * box.y_ctr - pts[0].y;
-  pts[3].x = 2 * box.x_ctr - pts[1].x;
-  pts[3].y = 2 * box.y_ctr - pts[1].y;
-}
-
-template <typename T>
-HOST_DEVICE_INLINE int get_intersection_points(
-    const Point<T> (&pts1)[4],
-    const Point<T> (&pts2)[4],
-    Point<T> (&intersections)[24]) {
-  // Line vector
-  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
-  Point<T> vec1[4], vec2[4];
-  for (int i = 0; i < 4; i++) {
-    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
-    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
-  }
-
-  // Line test - test all line combos for intersection
-  int num = 0; // number of intersections
-  for (int i = 0; i < 4; i++) {
-    for (int j = 0; j < 4; j++) {
-      // Solve for 2x2 Ax=b
-      T det = cross_2d<T>(vec2[j], vec1[i]);
-
-      // This takes care of parallel lines
-      if (fabs(det) <= 1e-14) {
-        continue;
-      }
-
-      auto vec12 = pts2[j] - pts1[i];
-
-      T t1 = cross_2d<T>(vec2[j], vec12) / det;
-      T t2 = cross_2d<T>(vec1[i], vec12) / det;
-
-      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
-        intersections[num++] = pts1[i] + vec1[i] * t1;
-      }
-    }
-  }
-
-  // Check for vertices of rect1 inside rect2
-  {
-    const auto& AB = vec2[0];
-    const auto& DA = vec2[3];
-    auto ABdotAB = dot_2d<T>(AB, AB);
-    auto ADdotAD = dot_2d<T>(DA, DA);
-    for (int i = 0; i < 4; i++) {
-      // assume ABCD is the rectangle, and P is the point to be judged
-      // P is inside ABCD iff. P's projection on AB lies within AB
-      // and P's projection on AD lies within AD
-
-      auto AP = pts1[i] - pts2[0];
-
-      auto APdotAB = dot_2d<T>(AP, AB);
-      auto APdotAD = -dot_2d<T>(AP, DA);
-
-      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
-          (APdotAD <= ADdotAD)) {
-        intersections[num++] = pts1[i];
-      }
-    }
-  }
-
-  // Reverse the check - check for vertices of rect2 inside rect1
-  {
-    const auto& AB = vec1[0];
-    const auto& DA = vec1[3];
-    auto ABdotAB = dot_2d<T>(AB, AB);
-    auto ADdotAD = dot_2d<T>(DA, DA);
-    for (int i = 0; i < 4; i++) {
-      auto AP = pts2[i] - pts1[0];
-
-      auto APdotAB = dot_2d<T>(AP, AB);
-      auto APdotAD = -dot_2d<T>(AP, DA);
-
-      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
-          (APdotAD <= ADdotAD)) {
-        intersections[num++] = pts2[i];
-      }
-    }
-  }
-
-  return num;
-}
-
-template <typename T>
-HOST_DEVICE_INLINE int convex_hull_graham(
-    const Point<T> (&p)[24],
-    const int& num_in,
-    Point<T> (&q)[24],
-    bool shift_to_zero = false) {
-  assert(num_in >= 2);
-
-  // Step 1:
-  // Find point with minimum y
-  // if more than 1 points have the same minimum y,
-  // pick the one with the minimum x.
-  int t = 0;
-  for (int i = 1; i < num_in; i++) {
-    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
-      t = i;
-    }
-  }
-  auto& start = p[t]; // starting point
-
-  // Step 2:
-  // Subtract starting point from every points (for sorting in the next step)
-  for (int i = 0; i < num_in; i++) {
-    q[i] = p[i] - start;
-  }
-
-  // Swap the starting point to position 0
-  auto tmp = q[0];
-  q[0] = q[t];
-  q[t] = tmp;
-
-  // Step 3:
-  // Sort point 1 ~ num_in according to their relative cross-product values
-  // (essentially sorting according to angles)
-  // If the angles are the same, sort according to their distance to origin
-  T dist[24];
-#ifdef __CUDACC__
-  // compute distance to origin before sort, and sort them together with the
-  // points
-  for (int i = 0; i < num_in; i++) {
-    dist[i] = dot_2d<T>(q[i], q[i]);
-  }
-
-  // CUDA version
-  // In the future, we can potentially use thrust
-  // for sorting here to improve speed (though not guaranteed)
-  for (int i = 1; i < num_in - 1; i++) {
-    for (int j = i + 1; j < num_in; j++) {
-      T crossProduct = cross_2d<T>(q[i], q[j]);
-      if ((crossProduct < -1e-6) ||
-          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
-        auto q_tmp = q[i];
-        q[i] = q[j];
-        q[j] = q_tmp;
-        auto dist_tmp = dist[i];
-        dist[i] = dist[j];
-        dist[j] = dist_tmp;
-      }
-    }
-  }
-#else
-  // CPU version
-  std::sort(
-      q + 1, q + num_in, [](const Point<T>& A, const Point<T>& B) -> bool {
-        T temp = cross_2d<T>(A, B);
-        if (fabs(temp) < 1e-6) {
-          return dot_2d<T>(A, A) < dot_2d<T>(B, B);
-        } else {
-          return temp > 0;
-        }
-      });
-  // compute distance to origin after sort, since the points are now different.
-  for (int i = 0; i < num_in; i++) {
-    dist[i] = dot_2d<T>(q[i], q[i]);
-  }
-#endif
-
-  // Step 4:
-  // Make sure there are at least 2 points (that don't overlap with each other)
-  // in the stack
-  int k; // index of the non-overlapped second point
-  for (k = 1; k < num_in; k++) {
-    if (dist[k] > 1e-8) {
-      break;
-    }
-  }
-  if (k == num_in) {
-    // We reach the end, which means the convex hull is just one point
-    q[0] = p[t];
-    return 1;
-  }
-  q[1] = q[k];
-  int m = 2; // 2 points in the stack
-  // Step 5:
-  // Finally we can start the scanning process.
-  // When a non-convex relationship between the 3 points is found
-  // (either concave shape or duplicated points),
-  // we pop the previous point from the stack
-  // until the 3-point relationship is convex again, or
-  // until the stack only contains two points
-  for (int i = k + 1; i < num_in; i++) {
-    while (m > 1) {
-      auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2];
-      // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) -
-      // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we
-      // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means
-      // round to nearest floating point).
-      if (q1.x * q2.y >= q2.x * q1.y)
-        m--;
-      else
-        break;
-    }
-    // Using double also helps, but float can solve the issue for now.
-    // while (m > 1 && cross_2d<T, double>(q[i] - q[m - 2], q[m - 1] - q[m - 2])
-    // >= 0) {
-    //     m--;
-    // }
-    q[m++] = q[i];
-  }
-
-  // Step 6 (Optional):
-  // In general sense we need the original coordinates, so we
-  // need to shift the points back (reverting Step 2)
-  // But if we're only interested in getting the area/perimeter of the shape
-  // We can simply return.
-  if (!shift_to_zero) {
-    for (int i = 0; i < m; i++) {
-      q[i] += start;
-    }
-  }
-
-  return m;
-}
-
-template <typename T>
-HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int& m) {
-  if (m <= 2) {
-    return 0;
-  }
-
-  T area = 0;
-  for (int i = 1; i < m - 1; i++) {
-    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
-  }
-
-  return area / 2.0;
-}
-
-template <typename T>
-HOST_DEVICE_INLINE T rotated_boxes_intersection(
-    const RotatedBox<T>& box1,
-    const RotatedBox<T>& box2) {
-  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
-  // from rotated_rect_intersection_pts
-  Point<T> intersectPts[24], orderedPts[24];
-
-  Point<T> pts1[4];
-  Point<T> pts2[4];
-  get_rotated_vertices<T>(box1, pts1);
-  get_rotated_vertices<T>(box2, pts2);
-
-  int num = get_intersection_points<T>(pts1, pts2, intersectPts);
-
-  if (num <= 2) {
-    return 0.0;
-  }
-
-  // Convex Hull to order the intersection points in clockwise order and find
-  // the contour area.
-  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
-  return polygon_area<T>(orderedPts, num_convex);
-}
-
-} // namespace
-
-template <typename T>
-HOST_DEVICE_INLINE T
-single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) {
-  // shift center to the middle point to achieve higher precision in result
-  RotatedBox<T> box1, box2;
-  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
-  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
-  box1.x_ctr = box1_raw[0] - center_shift_x;
-  box1.y_ctr = box1_raw[1] - center_shift_y;
-  box1.w = box1_raw[2];
-  box1.h = box1_raw[3];
-  box1.a = box1_raw[4];
-  box2.x_ctr = box2_raw[0] - center_shift_x;
-  box2.y_ctr = box2_raw[1] - center_shift_y;
-  box2.w = box2_raw[2];
-  box2.h = box2_raw[3];
-  box2.a = box2_raw[4];
-
-  T area1 = box1.w * box1.h;
-  T area2 = box2.w * box2.h;
-  if (area1 < 1e-14 || area2 < 1e-14) {
-    return 0.f;
-  }
-
-  T intersection = rotated_boxes_intersection<T>(box1, box2);
-  T iou = intersection / (area1 + area2 - intersection);
-  return iou;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu
deleted file mode 100644
index af088e7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-#include <cuda_runtime_api.h>
-
-namespace detectron2 {
-int get_cudart_version() {
-  return CUDART_VERSION;
-}
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h
deleted file mode 100644
index 49ccd86..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h
+++ /dev/null
@@ -1,377 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace detectron2 {
-
-#ifdef WITH_CUDA
-int deform_conv_forward_cuda(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor offset,
-    at::Tensor output,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step);
-
-int deform_conv_backward_input_cuda(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradInput,
-    at::Tensor gradOffset,
-    at::Tensor weight,
-    at::Tensor columns,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step);
-
-int deform_conv_backward_parameters_cuda(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradWeight, // at::Tensor gradBias,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    float scale,
-    int im2col_step);
-
-void modulated_deform_conv_cuda_forward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor output,
-    at::Tensor columns,
-    int kernel_h,
-    int kernel_w,
-    const int stride_h,
-    const int stride_w,
-    const int pad_h,
-    const int pad_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int group,
-    const int deformable_group,
-    const bool with_bias);
-
-void modulated_deform_conv_cuda_backward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor columns,
-    at::Tensor grad_input,
-    at::Tensor grad_weight,
-    at::Tensor grad_bias,
-    at::Tensor grad_offset,
-    at::Tensor grad_mask,
-    at::Tensor grad_output,
-    int kernel_h,
-    int kernel_w,
-    int stride_h,
-    int stride_w,
-    int pad_h,
-    int pad_w,
-    int dilation_h,
-    int dilation_w,
-    int group,
-    int deformable_group,
-    const bool with_bias);
-
-#endif
-
-inline int deform_conv_forward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor offset,
-    at::Tensor output,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step) {
-  if (input.is_cuda()) {
-#ifdef WITH_CUDA
-    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
-    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
-    return deform_conv_forward_cuda(
-        input,
-        weight,
-        offset,
-        output,
-        columns,
-        ones,
-        kW,
-        kH,
-        dW,
-        dH,
-        padW,
-        padH,
-        dilationW,
-        dilationH,
-        group,
-        deformable_group,
-        im2col_step);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-inline int deform_conv_backward_input(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradInput,
-    at::Tensor gradOffset,
-    at::Tensor weight,
-    at::Tensor columns,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step) {
-  if (gradOutput.is_cuda()) {
-#ifdef WITH_CUDA
-    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
-    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
-    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
-    return deform_conv_backward_input_cuda(
-        input,
-        offset,
-        gradOutput,
-        gradInput,
-        gradOffset,
-        weight,
-        columns,
-        kW,
-        kH,
-        dW,
-        dH,
-        padW,
-        padH,
-        dilationW,
-        dilationH,
-        group,
-        deformable_group,
-        im2col_step);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-inline int deform_conv_backward_filter(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradWeight, // at::Tensor gradBias,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    float scale,
-    int im2col_step) {
-  if (gradOutput.is_cuda()) {
-#ifdef WITH_CUDA
-    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
-    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
-    return deform_conv_backward_parameters_cuda(
-        input,
-        offset,
-        gradOutput,
-        gradWeight,
-        columns,
-        ones,
-        kW,
-        kH,
-        dW,
-        dH,
-        padW,
-        padH,
-        dilationW,
-        dilationH,
-        group,
-        deformable_group,
-        scale,
-        im2col_step);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-inline void modulated_deform_conv_forward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor output,
-    at::Tensor columns,
-    int kernel_h,
-    int kernel_w,
-    const int stride_h,
-    const int stride_w,
-    const int pad_h,
-    const int pad_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int group,
-    const int deformable_group,
-    const bool with_bias) {
-  if (input.is_cuda()) {
-#ifdef WITH_CUDA
-    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
-    TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!");
-    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
-    return modulated_deform_conv_cuda_forward(
-        input,
-        weight,
-        bias,
-        ones,
-        offset,
-        mask,
-        output,
-        columns,
-        kernel_h,
-        kernel_w,
-        stride_h,
-        stride_w,
-        pad_h,
-        pad_w,
-        dilation_h,
-        dilation_w,
-        group,
-        deformable_group,
-        with_bias);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-inline void modulated_deform_conv_backward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor columns,
-    at::Tensor grad_input,
-    at::Tensor grad_weight,
-    at::Tensor grad_bias,
-    at::Tensor grad_offset,
-    at::Tensor grad_mask,
-    at::Tensor grad_output,
-    int kernel_h,
-    int kernel_w,
-    int stride_h,
-    int stride_w,
-    int pad_h,
-    int pad_w,
-    int dilation_h,
-    int dilation_w,
-    int group,
-    int deformable_group,
-    const bool with_bias) {
-  if (grad_output.is_cuda()) {
-#ifdef WITH_CUDA
-    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
-    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
-    TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!");
-    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
-    return modulated_deform_conv_cuda_backward(
-        input,
-        weight,
-        bias,
-        ones,
-        offset,
-        mask,
-        columns,
-        grad_input,
-        grad_weight,
-        grad_bias,
-        grad_offset,
-        grad_mask,
-        grad_output,
-        kernel_h,
-        kernel_w,
-        stride_h,
-        stride_w,
-        pad_h,
-        pad_w,
-        dilation_h,
-        dilation_w,
-        group,
-        deformable_group,
-        with_bias);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
deleted file mode 100644
index 5376db0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
+++ /dev/null
@@ -1,1131 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-// modified from
-// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp
-// Original license: Apache 2.0
-
-// modify from
-// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
-// Original license: Apache 2.0
-
-#include <torch/types.h>
-
-#include "deform_conv.h"
-
-#include <cmath>
-#include <vector>
-
-namespace detectron2 {
-
-void deformable_im2col(
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor data_col);
-
-void deformable_col2im(
-    const at::Tensor data_col,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor grad_im);
-
-void deformable_col2im_coord(
-    const at::Tensor data_col,
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor grad_offset);
-
-void modulated_deformable_im2col_cuda(
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kenerl_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor data_col);
-
-void modulated_deformable_col2im_cuda(
-    const at::Tensor data_col,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kenerl_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor grad_im);
-
-void modulated_deformable_col2im_coord_cuda(
-    const at::Tensor data_col,
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kenerl_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor grad_offset,
-    at::Tensor grad_mask);
-
-void shape_check(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor* gradOutput,
-    at::Tensor weight,
-    int kH,
-    int kW,
-    int dH,
-    int dW,
-    int padH,
-    int padW,
-    int dilationH,
-    int dilationW,
-    int group,
-    int deformable_group) {
-  TORCH_CHECK(
-      weight.ndimension() == 4,
-      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
-      "but got: %s",
-      weight.ndimension());
-
-  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
-
-  TORCH_CHECK(
-      kW > 0 && kH > 0,
-      "kernel size should be greater than zero, but got kH: %d kW: %d",
-      kH,
-      kW);
-
-  TORCH_CHECK(
-      (weight.size(2) == kH && weight.size(3) == kW),
-      "kernel size should be consistent with weight, ",
-      "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
-      kH,
-      kW,
-      weight.size(2),
-      weight.size(3));
-
-  TORCH_CHECK(
-      dW > 0 && dH > 0,
-      "stride should be greater than zero, but got dH: %d dW: %d",
-      dH,
-      dW);
-
-  TORCH_CHECK(
-      dilationW > 0 && dilationH > 0,
-      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
-      dilationH,
-      dilationW);
-
-  int ndim = input.ndimension();
-  int dimf = 0;
-  int dimh = 1;
-  int dimw = 2;
-
-  if (ndim == 4) {
-    dimf++;
-    dimh++;
-    dimw++;
-  }
-
-  TORCH_CHECK(
-      ndim == 3 || ndim == 4,
-      "3D or 4D input tensor expected but got: %s",
-      ndim);
-
-  long nInputPlane = weight.size(1) * group;
-  long inputHeight = input.size(dimh);
-  long inputWidth = input.size(dimw);
-  long nOutputPlane = weight.size(0);
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-
-  TORCH_CHECK(
-      nInputPlane % deformable_group == 0,
-      "input channels must divide deformable group size");
-
-  if (outputWidth < 1 || outputHeight < 1)
-    AT_ERROR(
-        "Given input size: (%ld x %ld x %ld). "
-        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
-        nInputPlane,
-        inputHeight,
-        inputWidth,
-        nOutputPlane,
-        outputHeight,
-        outputWidth);
-
-  TORCH_CHECK(
-      input.size(1) == nInputPlane,
-      "invalid number of input planes, expected: %d, but got: %d",
-      nInputPlane,
-      input.size(1));
-
-  TORCH_CHECK(
-      (inputHeight >= kH && inputWidth >= kW),
-      "input image is smaller than kernel");
-
-  TORCH_CHECK(
-      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
-      "invalid spatial size of offset, expected height: %d width: %d, but "
-      "got height: %d width: %d",
-      outputHeight,
-      outputWidth,
-      offset.size(2),
-      offset.size(3));
-
-  TORCH_CHECK(
-      (offset.size(1) == deformable_group * 2 * kH * kW),
-      "invalid number of channels of offset");
-
-  if (gradOutput != NULL) {
-    TORCH_CHECK(
-        gradOutput->size(dimf) == nOutputPlane,
-        "invalid number of gradOutput planes, expected: %d, but got: %d",
-        nOutputPlane,
-        gradOutput->size(dimf));
-
-    TORCH_CHECK(
-        (gradOutput->size(dimh) == outputHeight &&
-         gradOutput->size(dimw) == outputWidth),
-        "invalid size of gradOutput, expected height: %d width: %d , but "
-        "got height: %d width: %d",
-        outputHeight,
-        outputWidth,
-        gradOutput->size(dimh),
-        gradOutput->size(dimw));
-  }
-}
-
-int deform_conv_forward_cuda(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor offset,
-    at::Tensor output,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step) {
-  // todo: resize columns to include im2col: done
-  // todo: add im2col_step as input
-  // todo: add new output buffer and transpose it to output (or directly
-  // transpose output) todo: possibly change data indexing because of
-  // parallel_imgs
-
-  shape_check(
-      input,
-      offset,
-      NULL,
-      weight,
-      kH,
-      kW,
-      dH,
-      dW,
-      padH,
-      padW,
-      dilationH,
-      dilationW,
-      group,
-      deformable_group);
-
-  input = input.contiguous();
-  offset = offset.contiguous();
-  weight = weight.contiguous();
-
-  int batch = 1;
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input.unsqueeze_(0);
-    offset.unsqueeze_(0);
-  }
-
-  // todo: assert batchsize dividable by im2col_step
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = weight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
-
-  output = output.view({batchSize / im2col_step,
-                        im2col_step,
-                        nOutputPlane,
-                        outputHeight,
-                        outputWidth});
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
-    ones = at::ones({outputHeight, outputWidth}, input.options());
-  }
-
-  input = input.view({batchSize / im2col_step,
-                      im2col_step,
-                      nInputPlane,
-                      inputHeight,
-                      inputWidth});
-  offset = offset.view({batchSize / im2col_step,
-                        im2col_step,
-                        deformable_group * 2 * kH * kW,
-                        outputHeight,
-                        outputWidth});
-
-  at::Tensor output_buffer = at::zeros(
-      {batchSize / im2col_step,
-       nOutputPlane,
-       im2col_step * outputHeight,
-       outputWidth},
-      output.options());
-
-  output_buffer = output_buffer.view({output_buffer.size(0),
-                                      group,
-                                      output_buffer.size(1) / group,
-                                      output_buffer.size(2),
-                                      output_buffer.size(3)});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    deformable_im2col(
-        input[elt],
-        offset[elt],
-        nInputPlane,
-        inputHeight,
-        inputWidth,
-        kH,
-        kW,
-        padH,
-        padW,
-        dH,
-        dW,
-        dilationH,
-        dilationW,
-        im2col_step,
-        deformable_group,
-        columns);
-
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group,
-                          weight.size(0) / group,
-                          weight.size(1),
-                          weight.size(2),
-                          weight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      output_buffer[elt][g] = output_buffer[elt][g]
-                                  .flatten(1)
-                                  .addmm_(weight[g].flatten(1), columns[g])
-                                  .view_as(output_buffer[elt][g]);
-    }
-  }
-
-  output_buffer =
-      output_buffer.view({output_buffer.size(0),
-                          output_buffer.size(1) * output_buffer.size(2),
-                          output_buffer.size(3),
-                          output_buffer.size(4)});
-
-  output_buffer = output_buffer.view({batchSize / im2col_step,
-                                      nOutputPlane,
-                                      im2col_step,
-                                      outputHeight,
-                                      outputWidth});
-  output_buffer.transpose_(1, 2);
-  output.copy_(output_buffer);
-  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    output = output.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
-  }
-
-  return 1;
-}
-
-int deform_conv_backward_input_cuda(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradInput,
-    at::Tensor gradOffset,
-    at::Tensor weight,
-    at::Tensor columns,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    int im2col_step) {
-  shape_check(
-      input,
-      offset,
-      &gradOutput,
-      weight,
-      kH,
-      kW,
-      dH,
-      dW,
-      padH,
-      padW,
-      dilationH,
-      dilationW,
-      group,
-      deformable_group);
-
-  input = input.contiguous();
-  offset = offset.contiguous();
-  gradOutput = gradOutput.contiguous();
-  weight = weight.contiguous();
-
-  int batch = 1;
-
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input = input.view({1, input.size(0), input.size(1), input.size(2)});
-    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
-    gradOutput = gradOutput.view(
-        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
-  }
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = weight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
-  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  // change order of grad output
-  gradOutput = gradOutput.view({batchSize / im2col_step,
-                                im2col_step,
-                                nOutputPlane,
-                                outputHeight,
-                                outputWidth});
-  gradOutput.transpose_(1, 2);
-
-  gradInput = gradInput.view({batchSize / im2col_step,
-                              im2col_step,
-                              nInputPlane,
-                              inputHeight,
-                              inputWidth});
-  input = input.view({batchSize / im2col_step,
-                      im2col_step,
-                      nInputPlane,
-                      inputHeight,
-                      inputWidth});
-  gradOffset = gradOffset.view({batchSize / im2col_step,
-                                im2col_step,
-                                deformable_group * 2 * kH * kW,
-                                outputHeight,
-                                outputWidth});
-  offset = offset.view({batchSize / im2col_step,
-                        im2col_step,
-                        deformable_group * 2 * kH * kW,
-                        outputHeight,
-                        outputWidth});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    // divide into groups
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group,
-                          weight.size(0) / group,
-                          weight.size(1),
-                          weight.size(2),
-                          weight.size(3)});
-    gradOutput = gradOutput.view({gradOutput.size(0),
-                                  group,
-                                  gradOutput.size(1) / group,
-                                  gradOutput.size(2),
-                                  gradOutput.size(3),
-                                  gradOutput.size(4)});
-
-    for (int g = 0; g < group; g++) {
-      columns[g] = columns[g].addmm_(
-          weight[g].flatten(1).transpose(0, 1),
-          gradOutput[elt][g].flatten(1),
-          0.0f,
-          1.0f);
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    gradOutput = gradOutput.view({gradOutput.size(0),
-                                  gradOutput.size(1) * gradOutput.size(2),
-                                  gradOutput.size(3),
-                                  gradOutput.size(4),
-                                  gradOutput.size(5)});
-
-    deformable_col2im_coord(
-        columns,
-        input[elt],
-        offset[elt],
-        nInputPlane,
-        inputHeight,
-        inputWidth,
-        kH,
-        kW,
-        padH,
-        padW,
-        dH,
-        dW,
-        dilationH,
-        dilationW,
-        im2col_step,
-        deformable_group,
-        gradOffset[elt]);
-
-    deformable_col2im(
-        columns,
-        offset[elt],
-        nInputPlane,
-        inputHeight,
-        inputWidth,
-        kH,
-        kW,
-        padH,
-        padW,
-        dH,
-        dW,
-        dilationH,
-        dilationW,
-        im2col_step,
-        deformable_group,
-        gradInput[elt]);
-  }
-
-  gradOutput.transpose_(1, 2);
-  gradOutput =
-      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  gradOffset = gradOffset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
-    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
-    gradOffset =
-        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
-  }
-
-  return 1;
-}
-
-int deform_conv_backward_parameters_cuda(
-    at::Tensor input,
-    at::Tensor offset,
-    at::Tensor gradOutput,
-    at::Tensor gradWeight, // at::Tensor gradBias,
-    at::Tensor columns,
-    at::Tensor ones,
-    int kW,
-    int kH,
-    int dW,
-    int dH,
-    int padW,
-    int padH,
-    int dilationW,
-    int dilationH,
-    int group,
-    int deformable_group,
-    float scale,
-    int im2col_step) {
-  // todo: transpose and reshape outGrad
-  // todo: reshape columns
-  // todo: add im2col_step as input
-
-  shape_check(
-      input,
-      offset,
-      &gradOutput,
-      gradWeight,
-      kH,
-      kW,
-      dH,
-      dW,
-      padH,
-      padW,
-      dilationH,
-      dilationW,
-      group,
-      deformable_group);
-
-  input = input.contiguous();
-  offset = offset.contiguous();
-  gradOutput = gradOutput.contiguous();
-
-  int batch = 1;
-
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input = input.view(
-        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
-    gradOutput = gradOutput.view(
-        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
-  }
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = gradWeight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
-
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  gradOutput = gradOutput.view({batchSize / im2col_step,
-                                im2col_step,
-                                nOutputPlane,
-                                outputHeight,
-                                outputWidth});
-  gradOutput.transpose_(1, 2);
-
-  at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);
-  gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step,
-                                            nOutputPlane,
-                                            im2col_step,
-                                            outputHeight,
-                                            outputWidth});
-  gradOutputBuffer.copy_(gradOutput);
-  // gradOutput is not contiguous, so we do reshape (instead of view) next
-  gradOutputBuffer = gradOutputBuffer.reshape({batchSize / im2col_step,
-                                               nOutputPlane,
-                                               im2col_step * outputHeight,
-                                               outputWidth});
-
-  gradOutput.transpose_(1, 2);
-  gradOutput =
-      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  input = input.view({batchSize / im2col_step,
-                      im2col_step,
-                      nInputPlane,
-                      inputHeight,
-                      inputWidth});
-  offset = offset.view({batchSize / im2col_step,
-                        im2col_step,
-                        deformable_group * 2 * kH * kW,
-                        outputHeight,
-                        outputWidth});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    deformable_im2col(
-        input[elt],
-        offset[elt],
-        nInputPlane,
-        inputHeight,
-        inputWidth,
-        kH,
-        kW,
-        padH,
-        padW,
-        dH,
-        dW,
-        dilationH,
-        dilationW,
-        im2col_step,
-        deformable_group,
-        columns);
-
-    // divide into group
-    gradOutputBuffer = gradOutputBuffer.view({gradOutputBuffer.size(0),
-                                              group,
-                                              gradOutputBuffer.size(1) / group,
-                                              gradOutputBuffer.size(2),
-                                              gradOutputBuffer.size(3)});
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    gradWeight = gradWeight.view({group,
-                                  gradWeight.size(0) / group,
-                                  gradWeight.size(1),
-                                  gradWeight.size(2),
-                                  gradWeight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      gradWeight[g] = gradWeight[g]
-                          .flatten(1)
-                          .addmm_(
-                              gradOutputBuffer[elt][g].flatten(1),
-                              columns[g].transpose(1, 0),
-                              1.0,
-                              scale)
-                          .view_as(gradWeight[g]);
-    }
-    gradOutputBuffer = gradOutputBuffer.view(
-        {gradOutputBuffer.size(0),
-         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
-         gradOutputBuffer.size(3),
-         gradOutputBuffer.size(4)});
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
-                                  gradWeight.size(2),
-                                  gradWeight.size(3),
-                                  gradWeight.size(4)});
-  }
-
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-  }
-
-  return 1;
-}
-
-void modulated_deform_conv_cuda_forward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor output,
-    at::Tensor columns,
-    int kernel_h,
-    int kernel_w,
-    const int stride_h,
-    const int stride_w,
-    const int pad_h,
-    const int pad_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int group,
-    const int deformable_group,
-    const bool with_bias) {
-  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
-  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
-
-  const int batch = input.size(0);
-  const int channels = input.size(1);
-  const int height = input.size(2);
-  const int width = input.size(3);
-
-  const int channels_out = weight.size(0);
-  const int channels_kernel = weight.size(1);
-  const int kernel_h_ = weight.size(2);
-  const int kernel_w_ = weight.size(3);
-
-  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
-    AT_ERROR(
-        "Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
-        kernel_h_,
-        kernel_w,
-        kernel_h_,
-        kernel_w_);
-  if (channels != channels_kernel * group)
-    AT_ERROR(
-        "Input shape and kernel channels wont match: (%d vs %d).",
-        channels,
-        channels_kernel * group);
-
-  const int height_out =
-      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int width_out =
-      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < height_out * width_out) {
-    // Resize plane and fill with ones...
-    ones = at::ones({height_out, width_out}, input.options());
-  }
-
-  // resize output
-  output = output.view({batch, channels_out, height_out, width_out}).zero_();
-  // resize temporary columns
-  columns = at::zeros(
-      {channels * kernel_h * kernel_w, 1 * height_out * width_out},
-      input.options());
-
-  output = output.view({output.size(0),
-                        group,
-                        output.size(1) / group,
-                        output.size(2),
-                        output.size(3)});
-
-  for (int b = 0; b < batch; b++) {
-    modulated_deformable_im2col_cuda(
-        input[b],
-        offset[b],
-        mask[b],
-        1,
-        channels,
-        height,
-        width,
-        height_out,
-        width_out,
-        kernel_h,
-        kernel_w,
-        pad_h,
-        pad_w,
-        stride_h,
-        stride_w,
-        dilation_h,
-        dilation_w,
-        deformable_group,
-        columns);
-
-    // divide into group
-    weight = weight.view({group,
-                          weight.size(0) / group,
-                          weight.size(1),
-                          weight.size(2),
-                          weight.size(3)});
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-
-    for (int g = 0; g < group; g++) {
-      output[b][g] = output[b][g]
-                         .flatten(1)
-                         .addmm_(weight[g].flatten(1), columns[g])
-                         .view_as(output[b][g]);
-    }
-
-    weight = weight.view({weight.size(0) * weight.size(1),
-                          weight.size(2),
-                          weight.size(3),
-                          weight.size(4)});
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-  }
-
-  output = output.view({output.size(0),
-                        output.size(1) * output.size(2),
-                        output.size(3),
-                        output.size(4)});
-
-  if (with_bias) {
-    output += bias.view({1, bias.size(0), 1, 1});
-  }
-}
-
-void modulated_deform_conv_cuda_backward(
-    at::Tensor input,
-    at::Tensor weight,
-    at::Tensor bias,
-    at::Tensor ones,
-    at::Tensor offset,
-    at::Tensor mask,
-    at::Tensor columns,
-    at::Tensor grad_input,
-    at::Tensor grad_weight,
-    at::Tensor grad_bias,
-    at::Tensor grad_offset,
-    at::Tensor grad_mask,
-    at::Tensor grad_output,
-    int kernel_h,
-    int kernel_w,
-    int stride_h,
-    int stride_w,
-    int pad_h,
-    int pad_w,
-    int dilation_h,
-    int dilation_w,
-    int group,
-    int deformable_group,
-    const bool with_bias) {
-  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
-  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
-
-  const int batch = input.size(0);
-  const int channels = input.size(1);
-  const int height = input.size(2);
-  const int width = input.size(3);
-
-  const int channels_kernel = weight.size(1);
-  const int kernel_h_ = weight.size(2);
-  const int kernel_w_ = weight.size(3);
-  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
-    AT_ERROR(
-        "Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
-        kernel_h_,
-        kernel_w,
-        kernel_h_,
-        kernel_w_);
-  if (channels != channels_kernel * group)
-    AT_ERROR(
-        "Input shape and kernel channels wont match: (%d vs %d).",
-        channels,
-        channels_kernel * group);
-
-  const int height_out =
-      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int width_out =
-      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < height_out * width_out) {
-    // Resize plane and fill with ones...
-    ones = at::ones({height_out, width_out}, input.options());
-  }
-
-  grad_input = grad_input.view({batch, channels, height, width});
-  columns = at::zeros(
-      {channels * kernel_h * kernel_w, height_out * width_out},
-      input.options());
-
-  grad_output = grad_output.view({grad_output.size(0),
-                                  group,
-                                  grad_output.size(1) / group,
-                                  grad_output.size(2),
-                                  grad_output.size(3)});
-
-  for (int b = 0; b < batch; b++) {
-    // divide int group
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group,
-                          weight.size(0) / group,
-                          weight.size(1),
-                          weight.size(2),
-                          weight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      columns[g].addmm_(
-          weight[g].flatten(1).transpose(0, 1),
-          grad_output[b][g].flatten(1),
-          0.0f,
-          1.0f);
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    weight = weight.view({weight.size(0) * weight.size(1),
-                          weight.size(2),
-                          weight.size(3),
-                          weight.size(4)});
-
-    // gradient w.r.t. input coordinate data
-    modulated_deformable_col2im_coord_cuda(
-        columns,
-        input[b],
-        offset[b],
-        mask[b],
-        1,
-        channels,
-        height,
-        width,
-        height_out,
-        width_out,
-        kernel_h,
-        kernel_w,
-        pad_h,
-        pad_w,
-        stride_h,
-        stride_w,
-        dilation_h,
-        dilation_w,
-        deformable_group,
-        grad_offset[b],
-        grad_mask[b]);
-    // gradient w.r.t. input data
-    modulated_deformable_col2im_cuda(
-        columns,
-        offset[b],
-        mask[b],
-        1,
-        channels,
-        height,
-        width,
-        height_out,
-        width_out,
-        kernel_h,
-        kernel_w,
-        pad_h,
-        pad_w,
-        stride_h,
-        stride_w,
-        dilation_h,
-        dilation_w,
-        deformable_group,
-        grad_input[b]);
-
-    // gradient w.r.t. weight, dWeight should accumulate across the batch and
-    // group
-    modulated_deformable_im2col_cuda(
-        input[b],
-        offset[b],
-        mask[b],
-        1,
-        channels,
-        height,
-        width,
-        height_out,
-        width_out,
-        kernel_h,
-        kernel_w,
-        pad_h,
-        pad_w,
-        stride_h,
-        stride_w,
-        dilation_h,
-        dilation_w,
-        deformable_group,
-        columns);
-
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    grad_weight = grad_weight.view({group,
-                                    grad_weight.size(0) / group,
-                                    grad_weight.size(1),
-                                    grad_weight.size(2),
-                                    grad_weight.size(3)});
-    if (with_bias)
-      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
-
-    for (int g = 0; g < group; g++) {
-      grad_weight[g] =
-          grad_weight[g]
-              .flatten(1)
-              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
-              .view_as(grad_weight[g]);
-      if (with_bias) {
-        grad_bias[g] =
-            grad_bias[g]
-                .view({-1, 1})
-                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
-                .view(-1);
-      }
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
-                                    grad_weight.size(2),
-                                    grad_weight.size(3),
-                                    grad_weight.size(4)});
-    if (with_bias)
-      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
-  }
-  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
-                                  grad_output.size(2),
-                                  grad_output.size(3),
-                                  grad_output.size(4)});
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
deleted file mode 100644
index 841f316..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
+++ /dev/null
@@ -1,1288 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-// modified from
-// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
-// Original license: Apache 2.0
-// clang-format off
-
-// modify from
-// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
-
-/*!
- ******************* BEGIN Caffe Copyright Notice and Disclaimer *****************
- *
- * COPYRIGHT
- *
- * All contributions by the University of California:
- * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
- * All rights reserved.
- *
- * All other contributions:
- * Copyright (c) 2014-2017, the respective contributors
- * All rights reserved.
- *
- * Caffe uses a shared copyright model: each contributor holds copyright over
- * their contributions to Caffe. The project versioning records all such
- * contribution and copyright details. If a contributor wants to further mark
- * their specific copyright on a particular contribution, they should indicate
- * their copyright solely in the commit message of the change when it is
- * committed.
- *
- * LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * CONTRIBUTION AGREEMENT
- *
- * By contributing to the BVLC/caffe repository through pull-request, comment,
- * or otherwise, the contributor releases their content to the
- * license and copyright terms herein.
- *
- ***************** END Caffe Copyright Notice and Disclaimer *********************
- *
- * Copyright (c) 2018 Microsoft
- * Licensed under The MIT License [see LICENSE for details]
- * \file modulated_deformable_im2col.cuh
- * \brief Function definitions of converting an image to
- * column matrix based on kernel, padding, dilation, and offset.
- * These functions are mainly used in deformable convolution operators.
- * \ref: https://arxiv.org/abs/1703.06211
- * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
- */
-
-#include <ATen/ATen.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-#include <THC/THCAtomics.cuh>
-
-using namespace at;
-
-#define CUDA_KERNEL_LOOP(i, n)                                 \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
-       i += blockDim.x * gridDim.x)
-
-
-namespace {
-
-const int CUDA_NUM_THREADS = 1024;
-const int kMaxGridNum = 65535;
-
-inline int GET_BLOCKS(const int N) {
-  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
-}
-
-}
-
-template <typename scalar_t>
-__device__ scalar_t deformable_im2col_bilinear(
-    const scalar_t* bottom_data,
-    const int data_width,
-    const int height,
-    const int width,
-    scalar_t h,
-    scalar_t w) {
-  int h_low = floor(h);
-  int w_low = floor(w);
-  int h_high = h_low + 1;
-  int w_high = w_low + 1;
-
-  scalar_t lh = h - h_low;
-  scalar_t lw = w - w_low;
-  scalar_t hh = 1 - lh, hw = 1 - lw;
-
-  scalar_t v1 = 0;
-  if (h_low >= 0 && w_low >= 0)
-    v1 = bottom_data[h_low * data_width + w_low];
-  scalar_t v2 = 0;
-  if (h_low >= 0 && w_high <= width - 1)
-    v2 = bottom_data[h_low * data_width + w_high];
-  scalar_t v3 = 0;
-  if (h_high <= height - 1 && w_low >= 0)
-    v3 = bottom_data[h_high * data_width + w_low];
-  scalar_t v4 = 0;
-  if (h_high <= height - 1 && w_high <= width - 1)
-    v4 = bottom_data[h_high * data_width + w_high];
-
-  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
-
-  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-  return val;
-}
-
-template <typename scalar_t>
-__device__ scalar_t get_gradient_weight(
-    scalar_t argmax_h,
-    scalar_t argmax_w,
-    const int h,
-    const int w,
-    const int height,
-    const int width) {
-  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
-      argmax_w >= width) {
-    // empty
-    return 0;
-  }
-
-  int argmax_h_low = floor(argmax_h);
-  int argmax_w_low = floor(argmax_w);
-  int argmax_h_high = argmax_h_low + 1;
-  int argmax_w_high = argmax_w_low + 1;
-
-  scalar_t weight = 0;
-  if (h == argmax_h_low && w == argmax_w_low)
-    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
-  if (h == argmax_h_low && w == argmax_w_high)
-    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
-  if (h == argmax_h_high && w == argmax_w_low)
-    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
-  if (h == argmax_h_high && w == argmax_w_high)
-    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
-  return weight;
-}
-
-template <typename scalar_t>
-__device__ scalar_t get_coordinate_weight(
-    scalar_t argmax_h,
-    scalar_t argmax_w,
-    const int height,
-    const int width,
-    const scalar_t* im_data,
-    const int data_width,
-    const int bp_dir) {
-  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
-      argmax_w >= width) {
-    // empty
-    return 0;
-  }
-
-  int argmax_h_low = floor(argmax_h);
-  int argmax_w_low = floor(argmax_w);
-  int argmax_h_high = argmax_h_low + 1;
-  int argmax_w_high = argmax_w_low + 1;
-
-  scalar_t weight = 0;
-
-  if (bp_dir == 0) {
-    if (argmax_h_low >= 0 && argmax_w_low >= 0)
-      weight += -1 * (argmax_w_low + 1 - argmax_w) *
-          im_data[argmax_h_low * data_width + argmax_w_low];
-    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
-      weight += -1 * (argmax_w - argmax_w_low) *
-          im_data[argmax_h_low * data_width + argmax_w_high];
-    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
-      weight += (argmax_w_low + 1 - argmax_w) *
-          im_data[argmax_h_high * data_width + argmax_w_low];
-    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
-      weight += (argmax_w - argmax_w_low) *
-          im_data[argmax_h_high * data_width + argmax_w_high];
-  } else if (bp_dir == 1) {
-    if (argmax_h_low >= 0 && argmax_w_low >= 0)
-      weight += -1 * (argmax_h_low + 1 - argmax_h) *
-          im_data[argmax_h_low * data_width + argmax_w_low];
-    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
-      weight += (argmax_h_low + 1 - argmax_h) *
-          im_data[argmax_h_low * data_width + argmax_w_high];
-    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
-      weight += -1 * (argmax_h - argmax_h_low) *
-          im_data[argmax_h_high * data_width + argmax_w_low];
-    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
-      weight += (argmax_h - argmax_h_low) *
-          im_data[argmax_h_high * data_width + argmax_w_high];
-  }
-
-  return weight;
-}
-
-template <typename scalar_t>
-__global__ void deformable_im2col_gpu_kernel(
-    const int n,
-    const scalar_t* data_im,
-    const scalar_t* data_offset,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int num_channels,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* data_col) {
-  CUDA_KERNEL_LOOP(index, n) {
-    // index index of output matrix
-    const int w_col = index % width_col;
-    const int h_col = (index / width_col) % height_col;
-    const int b_col = (index / width_col / height_col) % batch_size;
-    const int c_im = (index / width_col / height_col) / batch_size;
-    const int c_col = c_im * kernel_h * kernel_w;
-
-    // compute deformable group index
-    const int deformable_group_index = c_im / channel_per_deformable_group;
-
-    const int h_in = h_col * stride_h - pad_h;
-    const int w_in = w_col * stride_w - pad_w;
-    scalar_t* data_col_ptr = data_col +
-        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
-    // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) *
-    // height + h_in) * width + w_in;
-    const scalar_t* data_im_ptr =
-        data_im + (b_col * num_channels + c_im) * height * width;
-    const scalar_t* data_offset_ptr = data_offset +
-        (b_col * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-
-    for (int i = 0; i < kernel_h; ++i) {
-      for (int j = 0; j < kernel_w; ++j) {
-        const int data_offset_h_ptr =
-            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
-        const int data_offset_w_ptr =
-            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
-            w_col;
-        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-        scalar_t val = static_cast<scalar_t>(0);
-        const scalar_t h_im = h_in + i * dilation_h + offset_h;
-        const scalar_t w_im = w_in + j * dilation_w + offset_w;
-        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
-          // const scalar_t map_h = i * dilation_h + offset_h;
-          // const scalar_t map_w = j * dilation_w + offset_w;
-          // const int cur_height = height - h_in;
-          // const int cur_width = width - w_in;
-          // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height,
-          // cur_width, map_h, map_w);
-          val = deformable_im2col_bilinear(
-              data_im_ptr, width, height, width, h_im, w_im);
-        }
-        *data_col_ptr = val;
-        data_col_ptr += batch_size * height_col * width_col;
-      }
-    }
-  }
-}
-
-
-template <typename scalar_t>
-__global__ void deformable_col2im_gpu_kernel(
-    const int n,
-    const scalar_t* data_col,
-    const scalar_t* data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* grad_im) {
-  CUDA_KERNEL_LOOP(index, n) {
-    const int j = (index / width_col / height_col / batch_size) % kernel_w;
-    const int i =
-        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
-    const int c =
-        index / width_col / height_col / batch_size / kernel_w / kernel_h;
-    // compute the start and end of the output
-
-    const int deformable_group_index = c / channel_per_deformable_group;
-
-    int w_out = index % width_col;
-    int h_out = (index / width_col) % height_col;
-    int b = (index / width_col / height_col) % batch_size;
-    int w_in = w_out * stride_w - pad_w;
-    int h_in = h_out * stride_h - pad_h;
-
-    const scalar_t* data_offset_ptr = data_offset +
-        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-    const int data_offset_h_ptr =
-        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
-    const int data_offset_w_ptr =
-        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
-    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
-    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
-
-    const scalar_t cur_top_grad = data_col[index];
-    const int cur_h = (int)cur_inv_h_data;
-    const int cur_w = (int)cur_inv_w_data;
-    for (int dy = -2; dy <= 2; dy++) {
-      for (int dx = -2; dx <= 2; dx++) {
-        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
-            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
-            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
-          int cur_bottom_grad_pos =
-              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
-          scalar_t weight = get_gradient_weight(
-              cur_inv_h_data,
-              cur_inv_w_data,
-              cur_h + dy,
-              cur_w + dx,
-              height,
-              width);
-          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
-        }
-      }
-    }
-  }
-}
-
-
-template <typename scalar_t>
-__global__ void deformable_col2im_coord_gpu_kernel(
-    const int n,
-    const scalar_t* data_col,
-    const scalar_t* data_im,
-    const scalar_t* data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int offset_channels,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* grad_offset) {
-  CUDA_KERNEL_LOOP(index, n) {
-    scalar_t val = 0;
-    int w = index % width_col;
-    int h = (index / width_col) % height_col;
-    int c = (index / width_col / height_col) % offset_channels;
-    int b = (index / width_col / height_col) / offset_channels;
-    // compute the start and end of the output
-
-    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
-    const int col_step = kernel_h * kernel_w;
-    int cnt = 0;
-    const scalar_t* data_col_ptr = data_col +
-        deformable_group_index * channel_per_deformable_group * batch_size *
-            width_col * height_col;
-    const scalar_t* data_im_ptr = data_im +
-        (b * deformable_group + deformable_group_index) *
-            channel_per_deformable_group / kernel_h / kernel_w * height * width;
-    const scalar_t* data_offset_ptr = data_offset +
-        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-
-    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
-
-    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
-         col_c += col_step) {
-      const int col_pos =
-          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
-      const int bp_dir = offset_c % 2;
-
-      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
-      int i =
-          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
-      int w_out = col_pos % width_col;
-      int h_out = (col_pos / width_col) % height_col;
-      int w_in = w_out * stride_w - pad_w;
-      int h_in = h_out * stride_h - pad_h;
-      const int data_offset_h_ptr =
-          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
-      const int data_offset_w_ptr =
-          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
-           w_out);
-      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-      scalar_t inv_h = h_in + i * dilation_h + offset_h;
-      scalar_t inv_w = w_in + j * dilation_w + offset_w;
-      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) {
-        inv_h = inv_w = -2;
-      }
-      const scalar_t weight = get_coordinate_weight(
-          inv_h,
-          inv_w,
-          height,
-          width,
-          data_im_ptr + cnt * height * width,
-          width,
-          bp_dir);
-      val += weight * data_col_ptr[col_pos];
-      cnt += 1;
-    }
-
-    grad_offset[index] = val;
-  }
-}
-
-
-namespace detectron2 {
-
-void deformable_im2col(
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor data_col) {
-  // num_axes should be smaller than block size
-  // todo: check parallel_imgs is correctly passed in
-  int height_col =
-      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
-  int width_col =
-      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
-  int num_kernels = channels * height_col * width_col * parallel_imgs;
-  int channel_per_deformable_group = channels / deformable_group;
-
-  at::cuda::CUDAGuard device_guard(data_im.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
-        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-
-        deformable_im2col_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_im_,
-            data_offset_,
-            height,
-            width,
-            ksize_h,
-            ksize_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            parallel_imgs,
-            channels,
-            deformable_group,
-            height_col,
-            width_col,
-            data_col_);
-      }));
-
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
-  }
-}
-
-
-void deformable_col2im(
-    const at::Tensor data_col,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor grad_im) {
-  // todo: make sure parallel_imgs is passed in correctly
-  int height_col =
-      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
-  int width_col =
-      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
-  int num_kernels =
-      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
-  int channel_per_deformable_group = channels / deformable_group;
-
-  at::cuda::CUDAGuard device_guard(data_col.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
-        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        scalar_t* grad_im_ = grad_im.data_ptr<scalar_t>();
-
-        deformable_col2im_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_col_,
-            data_offset_,
-            channels,
-            height,
-            width,
-            ksize_h,
-            ksize_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            parallel_imgs,
-            deformable_group,
-            height_col,
-            width_col,
-            grad_im_);
-      }));
-
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
-  }
-}
-
-
-void deformable_col2im_coord(
-    const at::Tensor data_col,
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const int channels,
-    const int height,
-    const int width,
-    const int ksize_h,
-    const int ksize_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int parallel_imgs,
-    const int deformable_group,
-    at::Tensor grad_offset) {
-  int height_col =
-      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
-  int width_col =
-      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
-  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
-      deformable_group * parallel_imgs;
-  int channel_per_deformable_group =
-      channels * ksize_h * ksize_w / deformable_group;
-
-  at::cuda::CUDAGuard device_guard(data_col.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
-        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        scalar_t* grad_offset_ = grad_offset.data_ptr<scalar_t>();
-
-        deformable_col2im_coord_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_col_,
-            data_im_,
-            data_offset_,
-            channels,
-            height,
-            width,
-            ksize_h,
-            ksize_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            parallel_imgs,
-            2 * ksize_h * ksize_w * deformable_group,
-            deformable_group,
-            height_col,
-            width_col,
-            grad_offset_);
-      }));
-}
-
-} // namespace detectron2
-
-
-template <typename scalar_t>
-__device__ scalar_t dmcn_im2col_bilinear(
-    const scalar_t* bottom_data,
-    const int data_width,
-    const int height,
-    const int width,
-    scalar_t h,
-    scalar_t w) {
-  int h_low = floor(h);
-  int w_low = floor(w);
-  int h_high = h_low + 1;
-  int w_high = w_low + 1;
-
-  scalar_t lh = h - h_low;
-  scalar_t lw = w - w_low;
-  scalar_t hh = 1 - lh, hw = 1 - lw;
-
-  scalar_t v1 = 0;
-  if (h_low >= 0 && w_low >= 0)
-    v1 = bottom_data[h_low * data_width + w_low];
-  scalar_t v2 = 0;
-  if (h_low >= 0 && w_high <= width - 1)
-    v2 = bottom_data[h_low * data_width + w_high];
-  scalar_t v3 = 0;
-  if (h_high <= height - 1 && w_low >= 0)
-    v3 = bottom_data[h_high * data_width + w_low];
-  scalar_t v4 = 0;
-  if (h_high <= height - 1 && w_high <= width - 1)
-    v4 = bottom_data[h_high * data_width + w_high];
-
-  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
-
-  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-  return val;
-}
-
-template <typename scalar_t>
-__device__ scalar_t dmcn_get_gradient_weight(
-    scalar_t argmax_h,
-    scalar_t argmax_w,
-    const int h,
-    const int w,
-    const int height,
-    const int width) {
-  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
-      argmax_w >= width) {
-    // empty
-    return 0;
-  }
-
-  int argmax_h_low = floor(argmax_h);
-  int argmax_w_low = floor(argmax_w);
-  int argmax_h_high = argmax_h_low + 1;
-  int argmax_w_high = argmax_w_low + 1;
-
-  scalar_t weight = 0;
-  if (h == argmax_h_low && w == argmax_w_low)
-    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
-  if (h == argmax_h_low && w == argmax_w_high)
-    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
-  if (h == argmax_h_high && w == argmax_w_low)
-    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
-  if (h == argmax_h_high && w == argmax_w_high)
-    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
-  return weight;
-}
-
-template <typename scalar_t>
-__device__ scalar_t dmcn_get_coordinate_weight(
-    scalar_t argmax_h,
-    scalar_t argmax_w,
-    const int height,
-    const int width,
-    const scalar_t* im_data,
-    const int data_width,
-    const int bp_dir) {
-  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
-      argmax_w >= width) {
-    // empty
-    return 0;
-  }
-
-  int argmax_h_low = floor(argmax_h);
-  int argmax_w_low = floor(argmax_w);
-  int argmax_h_high = argmax_h_low + 1;
-  int argmax_w_high = argmax_w_low + 1;
-
-  scalar_t weight = 0;
-
-  if (bp_dir == 0) {
-    if (argmax_h_low >= 0 && argmax_w_low >= 0)
-      weight += -1 * (argmax_w_low + 1 - argmax_w) *
-          im_data[argmax_h_low * data_width + argmax_w_low];
-    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
-      weight += -1 * (argmax_w - argmax_w_low) *
-          im_data[argmax_h_low * data_width + argmax_w_high];
-    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
-      weight += (argmax_w_low + 1 - argmax_w) *
-          im_data[argmax_h_high * data_width + argmax_w_low];
-    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
-      weight += (argmax_w - argmax_w_low) *
-          im_data[argmax_h_high * data_width + argmax_w_high];
-  } else if (bp_dir == 1) {
-    if (argmax_h_low >= 0 && argmax_w_low >= 0)
-      weight += -1 * (argmax_h_low + 1 - argmax_h) *
-          im_data[argmax_h_low * data_width + argmax_w_low];
-    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
-      weight += (argmax_h_low + 1 - argmax_h) *
-          im_data[argmax_h_low * data_width + argmax_w_high];
-    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
-      weight += -1 * (argmax_h - argmax_h_low) *
-          im_data[argmax_h_high * data_width + argmax_w_low];
-    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
-      weight += (argmax_h - argmax_h_low) *
-          im_data[argmax_h_high * data_width + argmax_w_high];
-  }
-
-  return weight;
-}
-
-template <typename scalar_t>
-__global__ void modulated_deformable_im2col_gpu_kernel(
-    const int n,
-    const scalar_t* data_im,
-    const scalar_t* data_offset,
-    const scalar_t* data_mask,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int num_channels,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* data_col) {
-  CUDA_KERNEL_LOOP(index, n) {
-    // index index of output matrix
-    const int w_col = index % width_col;
-    const int h_col = (index / width_col) % height_col;
-    const int b_col = (index / width_col / height_col) % batch_size;
-    const int c_im = (index / width_col / height_col) / batch_size;
-    const int c_col = c_im * kernel_h * kernel_w;
-
-    // compute deformable group index
-    const int deformable_group_index = c_im / channel_per_deformable_group;
-
-    const int h_in = h_col * stride_h - pad_h;
-    const int w_in = w_col * stride_w - pad_w;
-
-    scalar_t* data_col_ptr = data_col +
-        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
-    // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) *
-    // height + h_in) * width + w_in;
-    const scalar_t* data_im_ptr =
-        data_im + (b_col * num_channels + c_im) * height * width;
-    const scalar_t* data_offset_ptr = data_offset +
-        (b_col * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-
-    const scalar_t* data_mask_ptr = data_mask +
-        (b_col * deformable_group + deformable_group_index) * kernel_h *
-            kernel_w * height_col * width_col;
-
-    for (int i = 0; i < kernel_h; ++i) {
-      for (int j = 0; j < kernel_w; ++j) {
-        const int data_offset_h_ptr =
-            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
-        const int data_offset_w_ptr =
-            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
-            w_col;
-        const int data_mask_hw_ptr =
-            ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
-        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-        const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
-        scalar_t val = static_cast<scalar_t>(0);
-        const scalar_t h_im = h_in + i * dilation_h + offset_h;
-        const scalar_t w_im = w_in + j * dilation_w + offset_w;
-        // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
-        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
-          // const float map_h = i * dilation_h + offset_h;
-          // const float map_w = j * dilation_w + offset_w;
-          // const int cur_height = height - h_in;
-          // const int cur_width = width - w_in;
-          // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height,
-          // cur_width, map_h, map_w);
-          val = dmcn_im2col_bilinear(
-              data_im_ptr, width, height, width, h_im, w_im);
-        }
-        *data_col_ptr = val * mask;
-        data_col_ptr += batch_size * height_col * width_col;
-        // data_col_ptr += height_col * width_col;
-      }
-    }
-  }
-}
-
-template <typename scalar_t>
-__global__ void modulated_deformable_col2im_gpu_kernel(
-    const int n,
-    const scalar_t* data_col,
-    const scalar_t* data_offset,
-    const scalar_t* data_mask,
-    const int channels,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* grad_im) {
-  CUDA_KERNEL_LOOP(index, n) {
-    const int j = (index / width_col / height_col / batch_size) % kernel_w;
-    const int i =
-        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
-    const int c =
-        index / width_col / height_col / batch_size / kernel_w / kernel_h;
-    // compute the start and end of the output
-
-    const int deformable_group_index = c / channel_per_deformable_group;
-
-    int w_out = index % width_col;
-    int h_out = (index / width_col) % height_col;
-    int b = (index / width_col / height_col) % batch_size;
-    int w_in = w_out * stride_w - pad_w;
-    int h_in = h_out * stride_h - pad_h;
-
-    const scalar_t* data_offset_ptr = data_offset +
-        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-    const scalar_t* data_mask_ptr = data_mask +
-        (b * deformable_group + deformable_group_index) * kernel_h * kernel_w *
-            height_col * width_col;
-    const int data_offset_h_ptr =
-        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
-    const int data_offset_w_ptr =
-        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
-    const int data_mask_hw_ptr =
-        ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
-    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-    const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
-    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
-    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
-
-    const scalar_t cur_top_grad = data_col[index] * mask;
-    const int cur_h = (int)cur_inv_h_data;
-    const int cur_w = (int)cur_inv_w_data;
-    for (int dy = -2; dy <= 2; dy++) {
-      for (int dx = -2; dx <= 2; dx++) {
-        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
-            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
-            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
-          int cur_bottom_grad_pos =
-              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
-          scalar_t weight = dmcn_get_gradient_weight(
-              cur_inv_h_data,
-              cur_inv_w_data,
-              cur_h + dy,
-              cur_w + dx,
-              height,
-              width);
-          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
-        }
-      }
-    }
-  }
-}
-
-template <typename scalar_t>
-__global__ void modulated_deformable_col2im_coord_gpu_kernel(
-    const int n,
-    const scalar_t* data_col,
-    const scalar_t* data_im,
-    const scalar_t* data_offset,
-    const scalar_t* data_mask,
-    const int channels,
-    const int height,
-    const int width,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int channel_per_deformable_group,
-    const int batch_size,
-    const int offset_channels,
-    const int deformable_group,
-    const int height_col,
-    const int width_col,
-    scalar_t* grad_offset,
-    scalar_t* grad_mask) {
-  CUDA_KERNEL_LOOP(index, n) {
-    scalar_t val = 0, mval = 0;
-    int w = index % width_col;
-    int h = (index / width_col) % height_col;
-    int c = (index / width_col / height_col) % offset_channels;
-    int b = (index / width_col / height_col) / offset_channels;
-    // compute the start and end of the output
-
-    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
-    const int col_step = kernel_h * kernel_w;
-    int cnt = 0;
-    const scalar_t* data_col_ptr = data_col +
-        deformable_group_index * channel_per_deformable_group * batch_size *
-            width_col * height_col;
-    const scalar_t* data_im_ptr = data_im +
-        (b * deformable_group + deformable_group_index) *
-            channel_per_deformable_group / kernel_h / kernel_w * height * width;
-    const scalar_t* data_offset_ptr = data_offset +
-        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
-            kernel_w * height_col * width_col;
-    const scalar_t* data_mask_ptr = data_mask +
-        (b * deformable_group + deformable_group_index) * kernel_h * kernel_w *
-            height_col * width_col;
-
-    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
-
-    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
-         col_c += col_step) {
-      const int col_pos =
-          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
-      const int bp_dir = offset_c % 2;
-
-      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
-      int i =
-          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
-      int w_out = col_pos % width_col;
-      int h_out = (col_pos / width_col) % height_col;
-      int w_in = w_out * stride_w - pad_w;
-      int h_in = h_out * stride_h - pad_h;
-      const int data_offset_h_ptr =
-          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
-      const int data_offset_w_ptr =
-          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
-           w_out);
-      const int data_mask_hw_ptr =
-          (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
-      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
-      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
-      const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
-      scalar_t inv_h = h_in + i * dilation_h + offset_h;
-      scalar_t inv_w = w_in + j * dilation_w + offset_w;
-      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) {
-        inv_h = inv_w = -2;
-      } else {
-        mval += data_col_ptr[col_pos] *
-            dmcn_im2col_bilinear(
-                    data_im_ptr + cnt * height * width,
-                    width,
-                    height,
-                    width,
-                    inv_h,
-                    inv_w);
-      }
-      const scalar_t weight = dmcn_get_coordinate_weight(
-          inv_h,
-          inv_w,
-          height,
-          width,
-          data_im_ptr + cnt * height * width,
-          width,
-          bp_dir);
-      val += weight * data_col_ptr[col_pos] * mask;
-      cnt += 1;
-    }
-    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
-    grad_offset[index] = val;
-    if (offset_c % 2 == 0)
-      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
-      // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
-      // height_col + h) * width_col + w], mask_req, mval);
-      grad_mask
-          [(((b * deformable_group + deformable_group_index) * kernel_h *
-                 kernel_w +
-             offset_c / 2) *
-                height_col +
-            h) *
-               width_col +
-           w] = mval;
-  }
-}
-
-
-namespace detectron2 {
-
-void modulated_deformable_im2col_cuda(
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kenerl_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor data_col) {
-  // num_axes should be smaller than block size
-  const int channel_per_deformable_group = channels / deformable_group;
-  const int num_kernels = channels * batch_size * height_col * width_col;
-
-  at::cuda::CUDAGuard device_guard(data_im.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
-        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
-        scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-
-        modulated_deformable_im2col_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_im_,
-            data_offset_,
-            data_mask_,
-            height_im,
-            width_im,
-            kernel_h,
-            kenerl_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            batch_size,
-            channels,
-            deformable_group,
-            height_col,
-            width_col,
-            data_col_);
-      }));
-
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    printf(
-        "error in modulated_deformable_im2col_cuda: %s\n",
-        cudaGetErrorString(err));
-  }
-}
-
-void modulated_deformable_col2im_cuda(
-    const at::Tensor data_col,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor grad_im) {
-  const int channel_per_deformable_group = channels / deformable_group;
-  const int num_kernels =
-      channels * kernel_h * kernel_w * batch_size * height_col * width_col;
-
-  at::cuda::CUDAGuard device_guard(data_col.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
-        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
-        scalar_t* grad_im_ = grad_im.data_ptr<scalar_t>();
-
-        modulated_deformable_col2im_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_col_,
-            data_offset_,
-            data_mask_,
-            channels,
-            height_im,
-            width_im,
-            kernel_h,
-            kernel_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            batch_size,
-            deformable_group,
-            height_col,
-            width_col,
-            grad_im_);
-      }));
-
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    printf(
-        "error in modulated_deformable_col2im_cuda: %s\n",
-        cudaGetErrorString(err));
-  }
-}
-
-void modulated_deformable_col2im_coord_cuda(
-    const at::Tensor data_col,
-    const at::Tensor data_im,
-    const at::Tensor data_offset,
-    const at::Tensor data_mask,
-    const int batch_size,
-    const int channels,
-    const int height_im,
-    const int width_im,
-    const int height_col,
-    const int width_col,
-    const int kernel_h,
-    const int kernel_w,
-    const int pad_h,
-    const int pad_w,
-    const int stride_h,
-    const int stride_w,
-    const int dilation_h,
-    const int dilation_w,
-    const int deformable_group,
-    at::Tensor grad_offset,
-    at::Tensor grad_mask) {
-  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
-      kernel_w * deformable_group;
-  const int channel_per_deformable_group =
-      channels * kernel_h * kernel_w / deformable_group;
-
-  at::cuda::CUDAGuard device_guard(data_col.device());
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
-        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
-        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
-        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
-        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
-        scalar_t* grad_offset_ = grad_offset.data_ptr<scalar_t>();
-        scalar_t* grad_mask_ = grad_mask.data_ptr<scalar_t>();
-
-        modulated_deformable_col2im_coord_gpu_kernel<<<
-            GET_BLOCKS(num_kernels),
-            CUDA_NUM_THREADS,
-            0,
-            stream>>>(
-            num_kernels,
-            data_col_,
-            data_im_,
-            data_offset_,
-            data_mask_,
-            channels,
-            height_im,
-            width_im,
-            kernel_h,
-            kernel_w,
-            pad_h,
-            pad_w,
-            stride_h,
-            stride_w,
-            dilation_h,
-            dilation_w,
-            channel_per_deformable_group,
-            batch_size,
-            2 * kernel_h * kernel_w * deformable_group,
-            deformable_group,
-            height_col,
-            width_col,
-            grad_offset_,
-            grad_mask_);
-      }));
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    printf(
-        "error in modulated_deformable_col2im_coord_cuda: %s\n",
-        cudaGetErrorString(err));
-  }
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h
deleted file mode 100644
index 9c86c8d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace detectron2 {
-
-at::Tensor nms_rotated_cpu(
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    const float iou_threshold);
-
-#ifdef WITH_CUDA
-at::Tensor nms_rotated_cuda(
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    const float iou_threshold);
-#endif
-
-// Interface for Python
-// inline is needed to prevent multiple function definitions when this header is
-// included by different cpps
-inline at::Tensor nms_rotated(
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    const float iou_threshold) {
-  assert(dets.device().is_cuda() == scores.device().is_cuda());
-  if (dets.device().is_cuda()) {
-#ifdef WITH_CUDA
-    return nms_rotated_cuda(
-        dets.contiguous(), scores.contiguous(), iou_threshold);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-
-  return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
deleted file mode 100644
index 0658e38..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include "../box_iou_rotated/box_iou_rotated_utils.h"
-#include "nms_rotated.h"
-
-namespace detectron2 {
-
-template <typename scalar_t>
-at::Tensor nms_rotated_cpu_kernel(
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    const float iou_threshold) {
-  // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
-  // however, the code in this function is much shorter because
-  // we delegate the IoU computation for rotated boxes to
-  // the single_box_iou_rotated function in box_iou_rotated_utils.h
-  AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor");
-  AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor");
-  AT_ASSERTM(
-      dets.scalar_type() == scores.scalar_type(),
-      "dets should have the same type as scores");
-
-  if (dets.numel() == 0) {
-    return at::empty({0}, dets.options().dtype(at::kLong));
-  }
-
-  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
-
-  auto ndets = dets.size(0);
-  at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
-  at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
-
-  auto suppressed = suppressed_t.data_ptr<uint8_t>();
-  auto keep = keep_t.data_ptr<int64_t>();
-  auto order = order_t.data_ptr<int64_t>();
-
-  int64_t num_to_keep = 0;
-
-  for (int64_t _i = 0; _i < ndets; _i++) {
-    auto i = order[_i];
-    if (suppressed[i] == 1) {
-      continue;
-    }
-
-    keep[num_to_keep++] = i;
-
-    for (int64_t _j = _i + 1; _j < ndets; _j++) {
-      auto j = order[_j];
-      if (suppressed[j] == 1) {
-        continue;
-      }
-
-      auto ovr = single_box_iou_rotated<scalar_t>(
-          dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
-      if (ovr >= iou_threshold) {
-        suppressed[j] = 1;
-      }
-    }
-  }
-  return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
-}
-
-at::Tensor nms_rotated_cpu(
-    // input must be contiguous
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    const float iou_threshold) {
-  auto result = at::empty({0}, dets.options());
-
-  AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
-    result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
-  });
-  return result;
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
deleted file mode 100644
index 40977a0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAApplyUtils.cuh>
-#include "../box_iou_rotated/box_iou_rotated_utils.h"
-
-using namespace detectron2;
-
-namespace {
-int const threadsPerBlock = sizeof(unsigned long long) * 8;
-}
-
-template <typename T>
-__global__ void nms_rotated_cuda_kernel(
-    const int n_boxes,
-    const float iou_threshold,
-    const T* dev_boxes,
-    unsigned long long* dev_mask) {
-  // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
-
-  const int row_start = blockIdx.y;
-  const int col_start = blockIdx.x;
-
-  // if (row_start > col_start) return;
-
-  const int row_size =
-      min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
-  const int col_size =
-      min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
-
-  // Compared to nms_cuda_kernel, where each box is represented with 4 values
-  // (x1, y1, x2, y2), each rotated box is represented with 5 values
-  // (x_center, y_center, width, height, angle_degrees) here.
-  __shared__ T block_boxes[threadsPerBlock * 5];
-  if (threadIdx.x < col_size) {
-    block_boxes[threadIdx.x * 5 + 0] =
-        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
-    block_boxes[threadIdx.x * 5 + 1] =
-        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
-    block_boxes[threadIdx.x * 5 + 2] =
-        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
-    block_boxes[threadIdx.x * 5 + 3] =
-        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
-    block_boxes[threadIdx.x * 5 + 4] =
-        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
-  }
-  __syncthreads();
-
-  if (threadIdx.x < row_size) {
-    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
-    const T* cur_box = dev_boxes + cur_box_idx * 5;
-    int i = 0;
-    unsigned long long t = 0;
-    int start = 0;
-    if (row_start == col_start) {
-      start = threadIdx.x + 1;
-    }
-    for (i = start; i < col_size; i++) {
-      // Instead of devIoU used by original horizontal nms, here
-      // we use the single_box_iou_rotated function from box_iou_rotated_utils.h
-      if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 5) >
-          iou_threshold) {
-        t |= 1ULL << i;
-      }
-    }
-    const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock);
-    dev_mask[cur_box_idx * col_blocks + col_start] = t;
-  }
-}
-
-namespace detectron2 {
-
-at::Tensor nms_rotated_cuda(
-    // input must be contiguous
-    const at::Tensor& dets,
-    const at::Tensor& scores,
-    float iou_threshold) {
-  // using scalar_t = float;
-  AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
-  AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
-  at::cuda::CUDAGuard device_guard(dets.device());
-
-  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
-  auto dets_sorted = dets.index_select(0, order_t);
-
-  auto dets_num = dets.size(0);
-
-  const int col_blocks =
-      at::cuda::ATenCeilDiv(static_cast<int>(dets_num), threadsPerBlock);
-
-  at::Tensor mask =
-      at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
-
-  dim3 blocks(col_blocks, col_blocks);
-  dim3 threads(threadsPerBlock);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES(
-      dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] {
-        nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-            dets_num,
-            iou_threshold,
-            dets_sorted.data_ptr<scalar_t>(),
-            (unsigned long long*)mask.data_ptr<int64_t>());
-      });
-
-  at::Tensor mask_cpu = mask.to(at::kCPU);
-  unsigned long long* mask_host =
-      (unsigned long long*)mask_cpu.data_ptr<int64_t>();
-
-  std::vector<unsigned long long> remv(col_blocks);
-  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
-
-  at::Tensor keep =
-      at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
-  int64_t* keep_out = keep.data_ptr<int64_t>();
-
-  int num_to_keep = 0;
-  for (int i = 0; i < dets_num; i++) {
-    int nblock = i / threadsPerBlock;
-    int inblock = i % threadsPerBlock;
-
-    if (!(remv[nblock] & (1ULL << inblock))) {
-      keep_out[num_to_keep++] = i;
-      unsigned long long* p = mask_host + i * col_blocks;
-      for (int j = nblock; j < col_blocks; j++) {
-        remv[j] |= p[j];
-      }
-    }
-  }
-
-  AT_CUDA_CHECK(cudaGetLastError());
-  return order_t.index(
-      {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
-           .to(order_t.device(), keep.scalar_type())});
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp
deleted file mode 100644
index fa7942e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-#include <torch/extension.h>
-#include "ROIAlign/ROIAlign.h"
-#include "ROIAlignRotated/ROIAlignRotated.h"
-#include "box_iou_rotated/box_iou_rotated.h"
-#include "deformable/deform_conv.h"
-#include "nms_rotated/nms_rotated.h"
-
-namespace detectron2 {
-
-#ifdef WITH_CUDA
-extern int get_cudart_version();
-#endif
-
-std::string get_cuda_version() {
-#ifdef WITH_CUDA
-  std::ostringstream oss;
-
-  // copied from
-  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
-  auto printCudaStyleVersion = [&](int v) {
-    oss << (v / 1000) << "." << (v / 10 % 100);
-    if (v % 10 != 0) {
-      oss << "." << (v % 10);
-    }
-  };
-  printCudaStyleVersion(get_cudart_version());
-  return oss.str();
-#else
-  return std::string("not available");
-#endif
-}
-
-// similar to
-// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
-std::string get_compiler_version() {
-  std::ostringstream ss;
-#if defined(__GNUC__)
-#ifndef __clang__
-
-#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8))
-#error "GCC >= 4.9 is required!"
-#endif
-
-  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
-#endif
-#endif
-
-#if defined(__clang_major__)
-  {
-    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
-       << __clang_patchlevel__;
-  }
-#endif
-
-#if defined(_MSC_VER)
-  { ss << "MSVC " << _MSC_FULL_VER; }
-#endif
-  return ss.str();
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
-  m.def("get_cuda_version", &get_cuda_version, "get_cuda_version");
-
-  m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
-
-  m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
-  m.def(
-      "deform_conv_backward_input",
-      &deform_conv_backward_input,
-      "deform_conv_backward_input");
-  m.def(
-      "deform_conv_backward_filter",
-      &deform_conv_backward_filter,
-      "deform_conv_backward_filter");
-  m.def(
-      "modulated_deform_conv_forward",
-      &modulated_deform_conv_forward,
-      "modulated_deform_conv_forward");
-  m.def(
-      "modulated_deform_conv_backward",
-      &modulated_deform_conv_backward,
-      "modulated_deform_conv_backward");
-
-  m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
-
-  m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
-  m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
-
-  m.def(
-      "roi_align_rotated_forward",
-      &ROIAlignRotated_forward,
-      "Forward pass for Rotated ROI-Align Operator");
-  m.def(
-      "roi_align_rotated_backward",
-      &ROIAlignRotated_backward,
-      "Backward pass for Rotated ROI-Align Operator");
-}
-
-} // namespace detectron2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py
deleted file mode 100644
index ba8c649..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py
+++ /dev/null
@@ -1,494 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from functools import lru_cache
-import torch
-from torch import nn
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
-from torch.nn.modules.utils import _pair
-
-from detectron2 import _C
-
-from .wrappers import _NewEmptyTensorOp
-
-
-class _DeformConv(Function):
-    @staticmethod
-    def forward(
-        ctx,
-        input,
-        offset,
-        weight,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        deformable_groups=1,
-        im2col_step=64,
-    ):
-        if input is not None and input.dim() != 4:
-            raise ValueError(
-                "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim())
-            )
-        ctx.stride = _pair(stride)
-        ctx.padding = _pair(padding)
-        ctx.dilation = _pair(dilation)
-        ctx.groups = groups
-        ctx.deformable_groups = deformable_groups
-        ctx.im2col_step = im2col_step
-
-        ctx.save_for_backward(input, offset, weight)
-
-        output = input.new_empty(
-            _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)
-        )
-
-        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
-
-        if not input.is_cuda:
-            raise NotImplementedError
-        else:
-            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
-            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
-
-            _C.deform_conv_forward(
-                input,
-                weight,
-                offset,
-                output,
-                ctx.bufs_[0],
-                ctx.bufs_[1],
-                weight.size(3),
-                weight.size(2),
-                ctx.stride[1],
-                ctx.stride[0],
-                ctx.padding[1],
-                ctx.padding[0],
-                ctx.dilation[1],
-                ctx.dilation[0],
-                ctx.groups,
-                ctx.deformable_groups,
-                cur_im2col_step,
-            )
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        input, offset, weight = ctx.saved_tensors
-
-        grad_input = grad_offset = grad_weight = None
-
-        if not grad_output.is_cuda:
-            raise NotImplementedError
-        else:
-            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
-            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
-
-            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
-                grad_input = torch.zeros_like(input)
-                grad_offset = torch.zeros_like(offset)
-                _C.deform_conv_backward_input(
-                    input,
-                    offset,
-                    grad_output,
-                    grad_input,
-                    grad_offset,
-                    weight,
-                    ctx.bufs_[0],
-                    weight.size(3),
-                    weight.size(2),
-                    ctx.stride[1],
-                    ctx.stride[0],
-                    ctx.padding[1],
-                    ctx.padding[0],
-                    ctx.dilation[1],
-                    ctx.dilation[0],
-                    ctx.groups,
-                    ctx.deformable_groups,
-                    cur_im2col_step,
-                )
-
-            if ctx.needs_input_grad[2]:
-                grad_weight = torch.zeros_like(weight)
-                _C.deform_conv_backward_filter(
-                    input,
-                    offset,
-                    grad_output,
-                    grad_weight,
-                    ctx.bufs_[0],
-                    ctx.bufs_[1],
-                    weight.size(3),
-                    weight.size(2),
-                    ctx.stride[1],
-                    ctx.stride[0],
-                    ctx.padding[1],
-                    ctx.padding[0],
-                    ctx.dilation[1],
-                    ctx.dilation[0],
-                    ctx.groups,
-                    ctx.deformable_groups,
-                    1,
-                    cur_im2col_step,
-                )
-
-        return grad_input, grad_offset, grad_weight, None, None, None, None, None, None
-
-    @staticmethod
-    def _output_size(input, weight, padding, dilation, stride):
-        channels = weight.size(0)
-        output_size = (input.size(0), channels)
-        for d in range(input.dim() - 2):
-            in_size = input.size(d + 2)
-            pad = padding[d]
-            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
-            stride_ = stride[d]
-            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,)
-        if not all(map(lambda s: s > 0, output_size)):
-            raise ValueError(
-                "convolution input is too small (output would be {})".format(
-                    "x".join(map(str, output_size))
-                )
-            )
-        return output_size
-
-    @staticmethod
-    @lru_cache(maxsize=128)
-    def _cal_im2col_step(input_size, default_size):
-        """
-        Calculate proper im2col step size, which should be divisible by input_size and not larger
-        than prefer_size. Meanwhile the step size should be as large as possible to be more
-        efficient. So we choose the largest one among all divisors of input_size which are smaller
-        than prefer_size.
-        :param input_size: input batch size .
-        :param default_size: default preferred im2col step size.
-        :return: the largest proper step size.
-        """
-        if input_size <= default_size:
-            return input_size
-        best_step = 1
-        for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)):
-            if input_size % step == 0:
-                if input_size // step <= default_size:
-                    return input_size // step
-                best_step = step
-
-        return best_step
-
-
-class _ModulatedDeformConv(Function):
-    @staticmethod
-    def forward(
-        ctx,
-        input,
-        offset,
-        mask,
-        weight,
-        bias=None,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        deformable_groups=1,
-    ):
-        ctx.stride = stride
-        ctx.padding = padding
-        ctx.dilation = dilation
-        ctx.groups = groups
-        ctx.deformable_groups = deformable_groups
-        ctx.with_bias = bias is not None
-        if not ctx.with_bias:
-            bias = input.new_empty(1)  # fake tensor
-        if not input.is_cuda:
-            raise NotImplementedError
-        if (
-            weight.requires_grad
-            or mask.requires_grad
-            or offset.requires_grad
-            or input.requires_grad
-        ):
-            ctx.save_for_backward(input, offset, mask, weight, bias)
-        output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight))
-        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
-        _C.modulated_deform_conv_forward(
-            input,
-            weight,
-            bias,
-            ctx._bufs[0],
-            offset,
-            mask,
-            output,
-            ctx._bufs[1],
-            weight.shape[2],
-            weight.shape[3],
-            ctx.stride,
-            ctx.stride,
-            ctx.padding,
-            ctx.padding,
-            ctx.dilation,
-            ctx.dilation,
-            ctx.groups,
-            ctx.deformable_groups,
-            ctx.with_bias,
-        )
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        if not grad_output.is_cuda:
-            raise NotImplementedError
-        input, offset, mask, weight, bias = ctx.saved_tensors
-        grad_input = torch.zeros_like(input)
-        grad_offset = torch.zeros_like(offset)
-        grad_mask = torch.zeros_like(mask)
-        grad_weight = torch.zeros_like(weight)
-        grad_bias = torch.zeros_like(bias)
-        _C.modulated_deform_conv_backward(
-            input,
-            weight,
-            bias,
-            ctx._bufs[0],
-            offset,
-            mask,
-            ctx._bufs[1],
-            grad_input,
-            grad_weight,
-            grad_bias,
-            grad_offset,
-            grad_mask,
-            grad_output,
-            weight.shape[2],
-            weight.shape[3],
-            ctx.stride,
-            ctx.stride,
-            ctx.padding,
-            ctx.padding,
-            ctx.dilation,
-            ctx.dilation,
-            ctx.groups,
-            ctx.deformable_groups,
-            ctx.with_bias,
-        )
-        if not ctx.with_bias:
-            grad_bias = None
-
-        return (
-            grad_input,
-            grad_offset,
-            grad_mask,
-            grad_weight,
-            grad_bias,
-            None,
-            None,
-            None,
-            None,
-            None,
-        )
-
-    @staticmethod
-    def _infer_shape(ctx, input, weight):
-        n = input.size(0)
-        channels_out = weight.size(0)
-        height, width = input.shape[2:4]
-        kernel_h, kernel_w = weight.shape[2:4]
-        height_out = (
-            height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)
-        ) // ctx.stride + 1
-        width_out = (
-            width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)
-        ) // ctx.stride + 1
-        return n, channels_out, height_out, width_out
-
-
-deform_conv = _DeformConv.apply
-modulated_deform_conv = _ModulatedDeformConv.apply
-
-
-class DeformConv(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        deformable_groups=1,
-        bias=False,
-        norm=None,
-        activation=None,
-    ):
-        """
-        Deformable convolution from :paper:`deformconv`.
-
-        Arguments are similar to :class:`Conv2D`. Extra arguments:
-
-        Args:
-            deformable_groups (int): number of groups used in deformable convolution.
-            norm (nn.Module, optional): a normalization layer
-            activation (callable(Tensor) -> Tensor): a callable activation function
-        """
-        super(DeformConv, self).__init__()
-
-        assert not bias
-        assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format(
-            in_channels, groups
-        )
-        assert (
-            out_channels % groups == 0
-        ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups)
-
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = _pair(kernel_size)
-        self.stride = _pair(stride)
-        self.padding = _pair(padding)
-        self.dilation = _pair(dilation)
-        self.groups = groups
-        self.deformable_groups = deformable_groups
-        self.norm = norm
-        self.activation = activation
-
-        self.weight = nn.Parameter(
-            torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)
-        )
-        self.bias = None
-
-        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
-
-    def forward(self, x, offset):
-        if x.numel() == 0:
-            # When input is empty, we want to return a empty tensor with "correct" shape,
-            # So that the following operations will not panic
-            # if they check for the shape of the tensor.
-            # This computes the height and width of the output tensor
-            output_shape = [
-                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
-                for i, p, di, k, s in zip(
-                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
-                )
-            ]
-            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
-            return _NewEmptyTensorOp.apply(x, output_shape)
-
-        x = deform_conv(
-            x,
-            offset,
-            self.weight,
-            self.stride,
-            self.padding,
-            self.dilation,
-            self.groups,
-            self.deformable_groups,
-        )
-        if self.norm is not None:
-            x = self.norm(x)
-        if self.activation is not None:
-            x = self.activation(x)
-        return x
-
-    def extra_repr(self):
-        tmpstr = "in_channels=" + str(self.in_channels)
-        tmpstr += ", out_channels=" + str(self.out_channels)
-        tmpstr += ", kernel_size=" + str(self.kernel_size)
-        tmpstr += ", stride=" + str(self.stride)
-        tmpstr += ", padding=" + str(self.padding)
-        tmpstr += ", dilation=" + str(self.dilation)
-        tmpstr += ", groups=" + str(self.groups)
-        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
-        tmpstr += ", bias=False"
-        return tmpstr
-
-
-class ModulatedDeformConv(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        deformable_groups=1,
-        bias=True,
-        norm=None,
-        activation=None,
-    ):
-        """
-        Modulated deformable convolution from :paper:`deformconv2`.
-
-        Arguments are similar to :class:`Conv2D`. Extra arguments:
-
-        Args:
-            deformable_groups (int): number of groups used in deformable convolution.
-            norm (nn.Module, optional): a normalization layer
-            activation (callable(Tensor) -> Tensor): a callable activation function
-        """
-        super(ModulatedDeformConv, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = _pair(kernel_size)
-        self.stride = stride
-        self.padding = padding
-        self.dilation = dilation
-        self.groups = groups
-        self.deformable_groups = deformable_groups
-        self.with_bias = bias
-        self.norm = norm
-        self.activation = activation
-
-        self.weight = nn.Parameter(
-            torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
-        )
-        if bias:
-            self.bias = nn.Parameter(torch.Tensor(out_channels))
-        else:
-            self.bias = None
-
-        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
-        if self.bias is not None:
-            nn.init.constant_(self.bias, 0)
-
-    def forward(self, x, offset, mask):
-        if x.numel() == 0:
-            output_shape = [
-                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
-                for i, p, di, k, s in zip(
-                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
-                )
-            ]
-            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
-            return _NewEmptyTensorOp.apply(x, output_shape)
-
-        x = modulated_deform_conv(
-            x,
-            offset,
-            mask,
-            self.weight,
-            self.bias,
-            self.stride,
-            self.padding,
-            self.dilation,
-            self.groups,
-            self.deformable_groups,
-        )
-        if self.norm is not None:
-            x = self.norm(x)
-        if self.activation is not None:
-            x = self.activation(x)
-        return x
-
-    def extra_repr(self):
-        tmpstr = "in_channels=" + str(self.in_channels)
-        tmpstr += ", out_channels=" + str(self.out_channels)
-        tmpstr += ", kernel_size=" + str(self.kernel_size)
-        tmpstr += ", stride=" + str(self.stride)
-        tmpstr += ", padding=" + str(self.padding)
-        tmpstr += ", dilation=" + str(self.dilation)
-        tmpstr += ", groups=" + str(self.groups)
-        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
-        tmpstr += ", bias=" + str(self.with_bias)
-        return tmpstr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py
deleted file mode 100644
index 0fe115d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import torch
-from PIL import Image
-from torch.nn import functional as F
-
-__all__ = ["paste_masks_in_image"]
-
-
-BYTES_PER_FLOAT = 4
-# TODO: This memory limit may be too much or too little. It would be better to
-# determine it based on available resources.
-GPU_MEM_LIMIT = 1024 ** 3  # 1 GB memory limit
-
-
-def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True):
-    """
-    Args:
-        masks: N, 1, H, W
-        boxes: N, 4
-        img_h, img_w (int):
-        skip_empty (bool): only paste masks within the region that
-            tightly bound all boxes, and returns the results this region only.
-            An important optimization for CPU.
-
-    Returns:
-        if skip_empty == False, a mask of shape (N, img_h, img_w)
-        if skip_empty == True, a mask of shape (N, h', w'), and the slice
-            object for the corresponding region.
-    """
-    # On GPU, paste all masks together (up to chunk size)
-    # by using the entire image to sample the masks
-    # Compared to pasting them one by one,
-    # this has more operations but is faster on COCO-scale dataset.
-    device = masks.device
-    if skip_empty:
-        x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to(
-            dtype=torch.int32
-        )
-        x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)
-        y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)
-    else:
-        x0_int, y0_int = 0, 0
-        x1_int, y1_int = img_w, img_h
-    x0, y0, x1, y1 = torch.split(boxes, 1, dim=1)  # each is Nx1
-
-    N = masks.shape[0]
-
-    img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5
-    img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5
-    img_y = (img_y - y0) / (y1 - y0) * 2 - 1
-    img_x = (img_x - x0) / (x1 - x0) * 2 - 1
-    # img_x, img_y have shapes (N, w), (N, h)
-
-    gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
-    gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
-    grid = torch.stack([gx, gy], dim=3)
-
-    img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False)
-
-    if skip_empty:
-        return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
-    else:
-        return img_masks[:, 0], ()
-
-
-def paste_masks_in_image(masks, boxes, image_shape, threshold=0.5):
-    """
-    Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image.
-    The location, height, and width for pasting each mask is determined by their
-    corresponding bounding boxes in boxes.
-
-    Note:
-        This is a complicated but more accurate implementation. In actual deployment, it is
-        often enough to use a faster but less accurate implementation.
-        See :func:`paste_mask_in_image_old` in this file for an alternative implementation.
-
-    Args:
-        masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of
-            detected object instances in the image and Hmask, Wmask are the mask width and mask
-            height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1].
-        boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4).
-            boxes[i] and masks[i] correspond to the same object instance.
-        image_shape (tuple): height, width
-        threshold (float): A threshold in [0, 1] for converting the (soft) masks to
-            binary masks.
-
-    Returns:
-        img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the
-        number of detected object instances and Himage, Wimage are the image width
-        and height. img_masks[i] is a binary mask for object instance i.
-    """
-
-    assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported"
-    N = len(masks)
-    if N == 0:
-        return masks.new_empty((0,) + image_shape, dtype=torch.uint8)
-    if not isinstance(boxes, torch.Tensor):
-        boxes = boxes.tensor
-    device = boxes.device
-    assert len(boxes) == N, boxes.shape
-
-    img_h, img_w = image_shape
-
-    # The actual implementation split the input into chunks,
-    # and paste them chunk by chunk.
-    if device.type == "cpu":
-        # CPU is most efficient when they are pasted one by one with skip_empty=True
-        # so that it performs minimal number of operations.
-        num_chunks = N
-    else:
-        # GPU benefits from parallelism for larger chunks, but may have memory issue
-        # int(img_h) because shape may be tensors in tracing
-        num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
-        assert (
-            num_chunks <= N
-        ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it"
-    chunks = torch.chunk(torch.arange(N, device=device), num_chunks)
-
-    img_masks = torch.zeros(
-        N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8
-    )
-    for inds in chunks:
-        masks_chunk, spatial_inds = _do_paste_mask(
-            masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu"
-        )
-
-        if threshold >= 0:
-            masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
-        else:
-            # for visualization and debugging
-            masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)
-
-        img_masks[(inds,) + spatial_inds] = masks_chunk
-    return img_masks
-
-
-# The below are the original paste function (from Detectron1) which has
-# larger quantization error.
-# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample.
-
-
-def paste_mask_in_image_old(mask, box, img_h, img_w, threshold):
-    """
-    Paste a single mask in an image.
-    This is a per-box implementation of :func:`paste_masks_in_image`.
-    This function has larger quantization error due to incorrect pixel
-    modeling and is not used any more.
-
-    Args:
-        mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single
-            object instance. Values are in [0, 1].
-        box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners
-            of the object instance.
-        img_h, img_w (int): Image height and width.
-        threshold (float): Mask binarization threshold in [0, 1].
-
-    Returns:
-        im_mask (Tensor):
-            The resized and binarized object mask pasted into the original
-            image plane (a tensor of shape (img_h, img_w)).
-    """
-    # Conversion from continuous box coordinates to discrete pixel coordinates
-    # via truncation (cast to int32). This determines which pixels to paste the
-    # mask onto.
-    box = box.to(dtype=torch.int32)  # Continuous to discrete coordinate conversion
-    # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to
-    # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1
-    # pixels (not x1 - x0 pixels).
-    samples_w = box[2] - box[0] + 1  # Number of pixel samples, *not* geometric width
-    samples_h = box[3] - box[1] + 1  # Number of pixel samples, *not* geometric height
-
-    # Resample the mask from it's original grid to the new samples_w x samples_h grid
-    mask = Image.fromarray(mask.cpu().numpy())
-    mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR)
-    mask = np.array(mask, copy=False)
-
-    if threshold >= 0:
-        mask = np.array(mask > threshold, dtype=np.uint8)
-        mask = torch.from_numpy(mask)
-    else:
-        # for visualization and debugging, we also
-        # allow it to return an unmodified mask
-        mask = torch.from_numpy(mask * 255).to(torch.uint8)
-
-    im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8)
-    x_0 = max(box[0], 0)
-    x_1 = min(box[2] + 1, img_w)
-    y_0 = max(box[1], 0)
-    y_1 = min(box[3] + 1, img_h)
-
-    im_mask[y_0:y_1, x_0:x_1] = mask[
-        (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])
-    ]
-    return im_mask
-
-
-# Our pixel modeling requires extrapolation for any continuous
-# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks,
-# we would like this extrapolation to be an interpolation between boundary values and zero,
-# instead of using absolute zero or boundary values.
-# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this:
-# masks, scale = pad_masks(masks[:, 0, :, :], 1)
-# boxes = scale_boxes(boxes.tensor, scale)
-
-
-def pad_masks(masks, padding):
-    """
-    Args:
-        masks (tensor): A tensor of shape (B, M, M) representing B masks.
-        padding (int): Number of cells to pad on all sides.
-
-    Returns:
-        The padded masks and the scale factor of the padding size / original size.
-    """
-    B = masks.shape[0]
-    M = masks.shape[-1]
-    pad2 = 2 * padding
-    scale = float(M + pad2) / M
-    padded_masks = masks.new_zeros((B, M + pad2, M + pad2))
-    padded_masks[:, padding:-padding, padding:-padding] = masks
-    return padded_masks, scale
-
-
-def scale_boxes(boxes, scale):
-    """
-    Args:
-        boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4
-            coords representing the corners x0, y0, x1, y1,
-        scale (float): The box scaling factor.
-
-    Returns:
-        Scaled boxes.
-    """
-    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
-    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
-    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
-    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
-
-    w_half *= scale
-    h_half *= scale
-
-    scaled_boxes = torch.zeros_like(boxes)
-    scaled_boxes[:, 0] = x_c - w_half
-    scaled_boxes[:, 2] = x_c + w_half
-    scaled_boxes[:, 1] = y_c - h_half
-    scaled_boxes[:, 3] = y_c + h_half
-    return scaled_boxes
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py
deleted file mode 100644
index aafe29b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import torch
-from torchvision.ops import boxes as box_ops
-from torchvision.ops import nms  # BC-compat
-
-
-def batched_nms(boxes, scores, idxs, iou_threshold):
-    """
-    Same as torchvision.ops.boxes.batched_nms, but safer.
-    """
-    assert boxes.shape[-1] == 4
-    # TODO may need better strategy.
-    # Investigate after having a fully-cuda NMS op.
-    if len(boxes) < 40000:
-        return box_ops.batched_nms(boxes, scores, idxs, iou_threshold)
-
-    result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
-    for id in torch.unique(idxs).cpu().tolist():
-        mask = (idxs == id).nonzero().view(-1)
-        keep = nms(boxes[mask], scores[mask], iou_threshold)
-        result_mask[mask[keep]] = True
-    keep = result_mask.nonzero().view(-1)
-    keep = keep[scores[keep].argsort(descending=True)]
-    return keep
-
-
-# Note: this function (nms_rotated) might be moved into
-# torchvision/ops/boxes.py in the future
-def nms_rotated(boxes, scores, iou_threshold):
-    """
-    Performs non-maximum suppression (NMS) on the rotated boxes according
-    to their intersection-over-union (IoU).
-
-    Rotated NMS iteratively removes lower scoring rotated boxes which have an
-    IoU greater than iou_threshold with another (higher scoring) rotated box.
-
-    Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as
-    RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they
-    can be representing completely different objects in certain tasks, e.g., OCR.
-
-    As for the question of whether rotated-NMS should treat them as faraway boxes
-    even though their IOU is 1, it depends on the application and/or ground truth annotation.
-
-    As an extreme example, consider a single character v and the square box around it.
-
-    If the angle is 0 degree, the object (text) would be read as 'v';
-
-    If the angle is 90 degrees, the object (text) would become '>';
-
-    If the angle is 180 degrees, the object (text) would become '^';
-
-    If the angle is 270/-90 degrees, the object (text) would become '<'
-
-    All of these cases have IoU of 1 to each other, and rotated NMS that only
-    uses IoU as criterion would only keep one of them with the highest score -
-    which, practically, still makes sense in most cases because typically
-    only one of theses orientations is the correct one. Also, it does not matter
-    as much if the box is only used to classify the object (instead of transcribing
-    them with a sequential OCR recognition model) later.
-
-    On the other hand, when we use IoU to filter proposals that are close to the
-    ground truth during training, we should definitely take the angle into account if
-    we know the ground truth is labeled with the strictly correct orientation (as in,
-    upside-down words are annotated with -180 degrees even though they can be covered
-    with a 0/90/-90 degree box, etc.)
-
-    The way the original dataset is annotated also matters. For example, if the dataset
-    is a 4-point polygon dataset that does not enforce ordering of vertices/orientation,
-    we can estimate a minimum rotated bounding box to this polygon, but there's no way
-    we can tell the correct angle with 100% confidence (as shown above, there could be 4 different
-    rotated boxes, with angles differed by 90 degrees to each other, covering the exactly
-    same region). In that case we have to just use IoU to determine the box
-    proximity (as many detection benchmarks (even for text) do) unless there're other
-    assumptions we can make (like width is always larger than height, or the object is not
-    rotated by more than 90 degrees CCW/CW, etc.)
-
-    In summary, not considering angles in rotated NMS seems to be a good option for now,
-    but we should be aware of its implications.
-
-    Args:
-        boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in
-           (x_center, y_center, width, height, angle_degrees) format.
-        scores (Tensor[N]): Scores for each one of the rotated boxes
-        iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold
-
-    Returns:
-        keep (Tensor): int64 tensor with the indices of the elements that have been kept
-        by Rotated NMS, sorted in decreasing order of scores
-    """
-    from detectron2 import _C
-
-    return _C.nms_rotated(boxes, scores, iou_threshold)
-
-
-# Note: this function (batched_nms_rotated) might be moved into
-# torchvision/ops/boxes.py in the future
-def batched_nms_rotated(boxes, scores, idxs, iou_threshold):
-    """
-    Performs non-maximum suppression in a batched fashion.
-
-    Each index value correspond to a category, and NMS
-    will not be applied between elements of different categories.
-
-    Args:
-        boxes (Tensor[N, 5]):
-           boxes where NMS will be performed. They
-           are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format
-        scores (Tensor[N]):
-           scores for each one of the boxes
-        idxs (Tensor[N]):
-           indices of the categories for each one of the boxes.
-        iou_threshold (float):
-           discards all overlapping boxes
-           with IoU < iou_threshold
-
-    Returns:
-        Tensor:
-            int64 tensor with the indices of the elements that have been kept
-            by NMS, sorted in decreasing order of scores
-    """
-    assert boxes.shape[-1] == 5
-
-    if boxes.numel() == 0:
-        return torch.empty((0,), dtype=torch.int64, device=boxes.device)
-    # Strategy: in order to perform NMS independently per class,
-    # we add an offset to all the boxes. The offset is dependent
-    # only on the class idx, and is large enough so that boxes
-    # from different classes do not overlap
-
-    # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate,
-    # which won't handle negative coordinates correctly.
-    # Here by using min_coordinate we can make sure the negative coordinates are
-    # correctly handled.
-    max_coordinate = (
-        torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2
-    ).max()
-    min_coordinate = (
-        torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2
-    ).min()
-    offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1)
-    boxes_for_nms = boxes.clone()  # avoid modifying the original values in boxes
-    boxes_for_nms[:, :2] += offsets[:, None]
-    keep = nms_rotated(boxes_for_nms, scores, iou_threshold)
-    return keep
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py
deleted file mode 100644
index f8c4ce1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from torch import nn
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
-from torch.nn.modules.utils import _pair
-
-from detectron2 import _C
-
-
-class _ROIAlign(Function):
-    @staticmethod
-    def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
-        ctx.save_for_backward(roi)
-        ctx.output_size = _pair(output_size)
-        ctx.spatial_scale = spatial_scale
-        ctx.sampling_ratio = sampling_ratio
-        ctx.input_shape = input.size()
-        ctx.aligned = aligned
-        output = _C.roi_align_forward(
-            input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
-        )
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        (rois,) = ctx.saved_tensors
-        output_size = ctx.output_size
-        spatial_scale = ctx.spatial_scale
-        sampling_ratio = ctx.sampling_ratio
-        bs, ch, h, w = ctx.input_shape
-        grad_input = _C.roi_align_backward(
-            grad_output,
-            rois,
-            spatial_scale,
-            output_size[0],
-            output_size[1],
-            bs,
-            ch,
-            h,
-            w,
-            sampling_ratio,
-            ctx.aligned,
-        )
-        return grad_input, None, None, None, None, None
-
-
-roi_align = _ROIAlign.apply
-
-
-class ROIAlign(nn.Module):
-    def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
-        """
-        Args:
-            output_size (tuple): h, w
-            spatial_scale (float): scale the input boxes by this number
-            sampling_ratio (int): number of inputs samples to take for each output
-                sample. 0 to take samples densely.
-            aligned (bool): if False, use the legacy implementation in
-                Detectron. If True, align the results more perfectly.
-
-        Note:
-            The meaning of aligned=True:
-
-            Given a continuous coordinate c, its two neighboring pixel indices (in our
-            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
-            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
-            from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
-            roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
-            pixel indices and therefore it uses pixels with a slightly incorrect alignment
-            (relative to our pixel model) when performing bilinear interpolation.
-
-            With `aligned=True`,
-            we first appropriately scale the ROI and then shift it by -0.5
-            prior to calling roi_align. This produces the correct neighbors; see
-            detectron2/tests/test_roi_align.py for verification.
-
-            The difference does not make a difference to the model's performance if
-            ROIAlign is used together with conv layers.
-        """
-        super(ROIAlign, self).__init__()
-        self.output_size = output_size
-        self.spatial_scale = spatial_scale
-        self.sampling_ratio = sampling_ratio
-        self.aligned = aligned
-
-    def forward(self, input, rois):
-        """
-        Args:
-            input: NCHW images
-            rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
-        """
-        assert rois.dim() == 2 and rois.size(1) == 5
-        return roi_align(
-            input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
-        )
-
-    def __repr__(self):
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ", aligned=" + str(self.aligned)
-        tmpstr += ")"
-        return tmpstr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py
deleted file mode 100644
index 6ed87e6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from torch import nn
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
-from torch.nn.modules.utils import _pair
-
-from detectron2 import _C
-
-
-class _ROIAlignRotated(Function):
-    @staticmethod
-    def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
-        ctx.save_for_backward(roi)
-        ctx.output_size = _pair(output_size)
-        ctx.spatial_scale = spatial_scale
-        ctx.sampling_ratio = sampling_ratio
-        ctx.input_shape = input.size()
-        output = _C.roi_align_rotated_forward(
-            input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
-        )
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        (rois,) = ctx.saved_tensors
-        output_size = ctx.output_size
-        spatial_scale = ctx.spatial_scale
-        sampling_ratio = ctx.sampling_ratio
-        bs, ch, h, w = ctx.input_shape
-        grad_input = _C.roi_align_rotated_backward(
-            grad_output,
-            rois,
-            spatial_scale,
-            output_size[0],
-            output_size[1],
-            bs,
-            ch,
-            h,
-            w,
-            sampling_ratio,
-        )
-        return grad_input, None, None, None, None, None
-
-
-roi_align_rotated = _ROIAlignRotated.apply
-
-
-class ROIAlignRotated(nn.Module):
-    def __init__(self, output_size, spatial_scale, sampling_ratio):
-        """
-        Args:
-            output_size (tuple): h, w
-            spatial_scale (float): scale the input boxes by this number
-            sampling_ratio (int): number of inputs samples to take for each output
-                sample. 0 to take samples densely.
-
-        Note:
-            ROIAlignRotated supports continuous coordinate by default:
-            Given a continuous coordinate c, its two neighboring pixel indices (in our
-            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
-            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
-            from the underlying signal at continuous coordinates 0.5 and 1.5).
-        """
-        super(ROIAlignRotated, self).__init__()
-        self.output_size = output_size
-        self.spatial_scale = spatial_scale
-        self.sampling_ratio = sampling_ratio
-
-    def forward(self, input, rois):
-        """
-        Args:
-            input: NCHW images
-            rois: Bx6 boxes. First column is the index into N.
-                The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
-        """
-        assert rois.dim() == 2 and rois.size(1) == 6
-        return roi_align_rotated(
-            input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
-        )
-
-    def __repr__(self):
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ")"
-        return tmpstr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py
deleted file mode 100644
index ea9b085..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-from detectron2 import _C
-
-
-def pairwise_iou_rotated(boxes1, boxes2):
-    """
-    Return intersection-over-union (Jaccard index) of boxes.
-
-    Both sets of boxes are expected to be in
-    (x_center, y_center, width, height, angle) format.
-
-    Arguments:
-        boxes1 (Tensor[N, 5])
-        boxes2 (Tensor[M, 5])
-
-    Returns:
-        iou (Tensor[N, M]): the NxM matrix containing the pairwise
-            IoU values for every element in boxes1 and boxes2
-    """
-    return _C.box_iou_rotated(boxes1, boxes2)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py
deleted file mode 100644
index ed7f0d0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from collections import namedtuple
-
-
-class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
-    """
-    A simple structure that contains basic shape specification about a tensor.
-    It is often used as the auxiliary inputs/outputs of models,
-    to obtain the shape inference ability among pytorch modules.
-
-    Attributes:
-        channels:
-        height:
-        width:
-        stride:
-    """
-
-    def __new__(cls, *, channels=None, height=None, width=None, stride=None):
-        return super().__new__(cls, channels, height, width, stride)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py
deleted file mode 100644
index 7e3935e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Wrappers around on some nn functions, mainly to support empty tensors.
-
-Ideally, add support directly in PyTorch to empty tensors in those functions.
-
-These can be removed once https://github.com/pytorch/pytorch/issues/12013
-is implemented
-"""
-
-import math
-import torch
-from torch.nn.modules.utils import _ntuple
-
-TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
-
-
-def cat(tensors, dim=0):
-    """
-    Efficient version of torch.cat that avoids a copy if there is only a single element in a list
-    """
-    assert isinstance(tensors, (list, tuple))
-    if len(tensors) == 1:
-        return tensors[0]
-    return torch.cat(tensors, dim)
-
-
-class _NewEmptyTensorOp(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx, x, new_shape):
-        ctx.shape = x.shape
-        return x.new_empty(new_shape)
-
-    @staticmethod
-    def backward(ctx, grad):
-        shape = ctx.shape
-        return _NewEmptyTensorOp.apply(grad, shape), None
-
-
-class Conv2d(torch.nn.Conv2d):
-    """
-    A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
-    """
-
-    def __init__(self, *args, **kwargs):
-        """
-        Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
-
-        Args:
-            norm (nn.Module, optional): a normalization layer
-            activation (callable(Tensor) -> Tensor): a callable activation function
-
-        It assumes that norm layer is used before activation.
-        """
-        norm = kwargs.pop("norm", None)
-        activation = kwargs.pop("activation", None)
-        super().__init__(*args, **kwargs)
-
-        self.norm = norm
-        self.activation = activation
-
-    def forward(self, x):
-        if x.numel() == 0 and self.training:
-            # https://github.com/pytorch/pytorch/issues/12013
-            assert not isinstance(
-                self.norm, torch.nn.SyncBatchNorm
-            ), "SyncBatchNorm does not support empty inputs!"
-
-        if x.numel() == 0 and TORCH_VERSION <= (1, 4):
-            assert not isinstance(
-                self.norm, torch.nn.GroupNorm
-            ), "GroupNorm does not support empty inputs in PyTorch <=1.4!"
-            # When input is empty, we want to return a empty tensor with "correct" shape,
-            # So that the following operations will not panic
-            # if they check for the shape of the tensor.
-            # This computes the height and width of the output tensor
-            output_shape = [
-                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
-                for i, p, di, k, s in zip(
-                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
-                )
-            ]
-            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
-            empty = _NewEmptyTensorOp.apply(x, output_shape)
-            if self.training:
-                # This is to make DDP happy.
-                # DDP expects all workers to have gradient w.r.t the same set of parameters.
-                _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
-                return empty + _dummy
-            else:
-                return empty
-
-        x = super().forward(x)
-        if self.norm is not None:
-            x = self.norm(x)
-        if self.activation is not None:
-            x = self.activation(x)
-        return x
-
-
-if TORCH_VERSION > (1, 4):
-    ConvTranspose2d = torch.nn.ConvTranspose2d
-else:
-
-    class ConvTranspose2d(torch.nn.ConvTranspose2d):
-        """
-        A wrapper around :class:`torch.nn.ConvTranspose2d` to support zero-size tensor.
-        """
-
-        def forward(self, x):
-            if x.numel() > 0:
-                return super(ConvTranspose2d, self).forward(x)
-            # get output shape
-
-            # When input is empty, we want to return a empty tensor with "correct" shape,
-            # So that the following operations will not panic
-            # if they check for the shape of the tensor.
-            # This computes the height and width of the output tensor
-            output_shape = [
-                (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
-                for i, p, di, k, d, op in zip(
-                    x.shape[-2:],
-                    self.padding,
-                    self.dilation,
-                    self.kernel_size,
-                    self.stride,
-                    self.output_padding,
-                )
-            ]
-            output_shape = [x.shape[0], self.out_channels] + output_shape
-            # This is to make DDP happy.
-            # DDP expects all workers to have gradient w.r.t the same set of parameters.
-            _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
-            return _NewEmptyTensorOp.apply(x, output_shape) + _dummy
-
-
-if TORCH_VERSION > (1, 4):
-    BatchNorm2d = torch.nn.BatchNorm2d
-else:
-
-    class BatchNorm2d(torch.nn.BatchNorm2d):
-        """
-        A wrapper around :class:`torch.nn.BatchNorm2d` to support zero-size tensor.
-        """
-
-        def forward(self, x):
-            if x.numel() > 0:
-                return super(BatchNorm2d, self).forward(x)
-            # get output shape
-            output_shape = x.shape
-            return _NewEmptyTensorOp.apply(x, output_shape)
-
-
-if TORCH_VERSION > (1, 5):
-    Linear = torch.nn.Linear
-else:
-
-    class Linear(torch.nn.Linear):
-        """
-        A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features.
-        Because of https://github.com/pytorch/pytorch/issues/34202
-        """
-
-        def forward(self, x):
-            if x.numel() == 0:
-                output_shape = [x.shape[0], self.weight.shape[0]]
-
-                empty = _NewEmptyTensorOp.apply(x, output_shape)
-                if self.training:
-                    # This is to make DDP happy.
-                    # DDP expects all workers to have gradient w.r.t the same set of parameters.
-                    _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
-                    return empty + _dummy
-                else:
-                    return empty
-
-            x = super().forward(x)
-            return x
-
-
-def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
-    """
-    A wrapper around :func:`torch.nn.functional.interpolate` to support zero-size tensor.
-    """
-    if TORCH_VERSION > (1, 4) or input.numel() > 0:
-        return torch.nn.functional.interpolate(
-            input, size, scale_factor, mode, align_corners=align_corners
-        )
-
-    def _check_size_scale_factor(dim):
-        if size is None and scale_factor is None:
-            raise ValueError("either size or scale_factor should be defined")
-        if size is not None and scale_factor is not None:
-            raise ValueError("only one of size or scale_factor should be defined")
-        if (
-            scale_factor is not None
-            and isinstance(scale_factor, tuple)
-            and len(scale_factor) != dim
-        ):
-            raise ValueError(
-                "scale_factor shape must match input shape. "
-                "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
-            )
-
-    def _output_size(dim):
-        _check_size_scale_factor(dim)
-        if size is not None:
-            return size
-        scale_factors = _ntuple(dim)(scale_factor)
-        # math.floor might return float in py2.7
-        return [int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)]
-
-    output_shape = tuple(_output_size(2))
-    output_shape = input.shape[:-2] + output_shape
-    return _NewEmptyTensorOp.apply(input, output_shape)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py
deleted file mode 100644
index 886616f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Model Zoo API for Detectron2: a collection of functions to create common model architectures and
-optionally load pre-trained weights as released in
-`MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_.
-"""
-from .model_zoo import get, get_config_file, get_checkpoint_url
-
-__all__ = ["get_checkpoint_url", "get", "get_config_file"]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py
deleted file mode 100644
index 68d0ce5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import os
-import pkg_resources
-import torch
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.modeling import build_model
-
-
-class _ModelZooUrls(object):
-    """
-    Mapping from names to officially released Detectron2 pre-trained models.
-    """
-
-    S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
-
-    # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl
-    CONFIG_PATH_TO_URL_SUFFIX = {
-        # COCO Detection with Faster R-CNN
-        "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl",
-        "COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl",
-        "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl",
-        "COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl",
-        "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl",
-        "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl",
-        "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl",
-        "COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl",
-        "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl",
-        "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl",
-        # COCO Detection with RetinaNet
-        "COCO-Detection/retinanet_R_50_FPN_1x.yaml": "137593951/model_final_b796dc.pkl",
-        "COCO-Detection/retinanet_R_50_FPN_3x.yaml": "137849486/model_final_4cafe0.pkl",
-        "COCO-Detection/retinanet_R_101_FPN_3x.yaml": "138363263/model_final_59f53c.pkl",
-        # COCO Detection with RPN and Fast R-CNN
-        "COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl",
-        "COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl",
-        "COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl",
-        # COCO Instance Segmentation Baselines with Mask R-CNN
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl",
-        "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl",  # noqa
-        # COCO Person Keypoint Detection Baselines with Keypoint R-CNN
-        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl",
-        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl",
-        "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl",
-        "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl",
-        # COCO Panoptic Segmentation Baselines with Panoptic FPN
-        "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl",
-        "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl",
-        "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl",
-        # LVIS Instance Segmentation Baselines with Mask R-CNN
-        "LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl",
-        "LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl",
-        "LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl",  # noqa
-        # Cityscapes & Pascal VOC Baselines
-        "Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl",
-        "PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl",
-        # Other Settings
-        "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl",
-        "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl",
-        "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl",
-        "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl",
-        "Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl",
-        "Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl",
-        "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl",
-        "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl",
-        "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl",  # noqa
-        # D1 Comparisons
-        "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl",  # noqa
-        "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl",  # noqa
-        "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl",
-    }
-
-
-def get_checkpoint_url(config_path):
-    """
-    Returns the URL to the model trained using the given config
-
-    Args:
-        config_path (str): config file name relative to detectron2's "configs/"
-            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
-
-    Returns:
-        str: a URL to the model
-    """
-    name = config_path.replace(".yaml", "")
-    if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX:
-        suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path]
-        return _ModelZooUrls.S3_PREFIX + name + "/" + suffix
-    raise RuntimeError("{} not available in Model Zoo!".format(name))
-
-
-def get_config_file(config_path):
-    """
-    Returns path to a builtin config file.
-
-    Args:
-        config_path (str): config file name relative to detectron2's "configs/"
-            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
-
-    Returns:
-        str: the real path to the config file.
-    """
-    cfg_file = pkg_resources.resource_filename(
-        "detectron2.model_zoo", os.path.join("configs", config_path)
-    )
-    if not os.path.exists(cfg_file):
-        raise RuntimeError("{} not available in Model Zoo!".format(config_path))
-    return cfg_file
-
-
-def get(config_path, trained: bool = False):
-    """
-    Get a model specified by relative path under Detectron2's official ``configs/`` directory.
-
-    Args:
-        config_path (str): config file name relative to detectron2's "configs/"
-            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
-        trained (bool): If True, will initialize the model with the trained model zoo weights.
-            If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used
-            instead; this will typically (though not always) initialize a subset of weights using
-            an ImageNet pre-trained model, while randomly initializing the other weights.
-
-    Example:
-
-    .. code-block:: python
-
-        from detectron2 import model_zoo
-        model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True)
-    """
-    cfg_file = get_config_file(config_path)
-
-    cfg = get_cfg()
-    cfg.merge_from_file(cfg_file)
-    if trained:
-        cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path)
-    if not torch.cuda.is_available():
-        cfg.MODEL.DEVICE = "cpu"
-
-    model = build_model(cfg)
-    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
-    return model
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py
deleted file mode 100644
index 9e23fe4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-
-from detectron2.layers import ShapeSpec
-
-from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
-from .backbone import (
-    BACKBONE_REGISTRY,
-    FPN,
-    Backbone,
-    ResNet,
-    ResNetBlockBase,
-    build_backbone,
-    build_resnet_backbone,
-    make_stage,
-)
-from .meta_arch import (
-    META_ARCH_REGISTRY,
-    SEM_SEG_HEADS_REGISTRY,
-    GeneralizedRCNN,
-    PanopticFPN,
-    ProposalNetwork,
-    RetinaNet,
-    SemanticSegmentor,
-    build_model,
-    build_sem_seg_head,
-)
-from .postprocessing import detector_postprocess
-from .proposal_generator import (
-    PROPOSAL_GENERATOR_REGISTRY,
-    build_proposal_generator,
-    RPN_HEAD_REGISTRY,
-    build_rpn_head,
-)
-from .roi_heads import (
-    ROI_BOX_HEAD_REGISTRY,
-    ROI_HEADS_REGISTRY,
-    ROI_KEYPOINT_HEAD_REGISTRY,
-    ROI_MASK_HEAD_REGISTRY,
-    ROIHeads,
-    StandardROIHeads,
-    BaseMaskRCNNHead,
-    BaseKeypointRCNNHead,
-    build_box_head,
-    build_keypoint_head,
-    build_mask_head,
-    build_roi_heads,
-)
-from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
-
-_EXCLUDE = {"torch", "ShapeSpec"}
-__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
-
-assert (
-    torch.Tensor([1]) == torch.Tensor([2])
-).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py
deleted file mode 100644
index 93927bc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py
+++ /dev/null
@@ -1,382 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from typing import List
-import torch
-from torch import nn
-
-from detectron2.config import configurable
-from detectron2.layers import ShapeSpec
-from detectron2.structures import Boxes, RotatedBoxes
-from detectron2.utils.registry import Registry
-
-ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR")
-ANCHOR_GENERATOR_REGISTRY.__doc__ = """
-Registry for modules that creates object detection anchors for feature maps.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-"""
-
-
-class BufferList(nn.Module):
-    """
-    Similar to nn.ParameterList, but for buffers
-    """
-
-    def __init__(self, buffers=None):
-        super(BufferList, self).__init__()
-        if buffers is not None:
-            self.extend(buffers)
-
-    def extend(self, buffers):
-        offset = len(self)
-        for i, buffer in enumerate(buffers):
-            self.register_buffer(str(offset + i), buffer)
-        return self
-
-    def __len__(self):
-        return len(self._buffers)
-
-    def __iter__(self):
-        return iter(self._buffers.values())
-
-
-def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device):
-    grid_height, grid_width = size
-    shifts_x = torch.arange(
-        offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device
-    )
-    shifts_y = torch.arange(
-        offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device
-    )
-
-    shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
-    shift_x = shift_x.reshape(-1)
-    shift_y = shift_y.reshape(-1)
-    return shift_x, shift_y
-
-
-def _broadcast_params(params, num_features, name):
-    """
-    If one size (or aspect ratio) is specified and there are multiple feature
-    maps, we "broadcast" anchors of that single size (or aspect ratio)
-    over all feature maps.
-
-    If params is list[float], or list[list[float]] with len(params) == 1, repeat
-    it num_features time.
-
-    Returns:
-        list[list[float]]: param for each feature
-    """
-    assert isinstance(
-        params, (list, tuple)
-    ), f"{name} in anchor generator has to be a list! Got {params}."
-    assert len(params), f"{name} in anchor generator cannot be empty!"
-    if not isinstance(params[0], (list, tuple)):  # list[float]
-        return [params] * num_features
-    if len(params) == 1:
-        return list(params) * num_features
-    assert len(params) == num_features, (
-        f"Got {name} of length {len(params)} in anchor generator, "
-        f"but the number of input features is {num_features}!"
-    )
-    return params
-
-
-@ANCHOR_GENERATOR_REGISTRY.register()
-class DefaultAnchorGenerator(nn.Module):
-    """
-    Compute anchors in the standard ways described in
-    "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks".
-    """
-
-    box_dim: int = 4
-    """
-    the dimension of each anchor box.
-    """
-
-    @configurable
-    def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5):
-        """
-        This interface is experimental.
-
-        Args:
-            sizes (list[list[float]] or list[float]):
-                If sizes is list[list[float]], sizes[i] is the list of anchor sizes
-                (i.e. sqrt of anchor area) to use for the i-th feature map.
-                If sizes is list[float], the sizes are used for all feature maps.
-                Anchor sizes are given in absolute lengths in units of
-                the input image; they do not dynamically scale if the input image size changes.
-            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
-                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
-            strides (list[int]): stride of each input feature.
-            offset (float): Relative offset between the center of the first anchor and the top-left
-                corner of the image. Value has to be in [0, 1).
-                Recommend to use 0.5, which means half stride.
-        """
-        super().__init__()
-
-        self.strides = strides
-        self.num_features = len(self.strides)
-        sizes = _broadcast_params(sizes, self.num_features, "sizes")
-        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
-        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios)
-
-        self.offset = offset
-        assert 0.0 <= self.offset < 1.0, self.offset
-
-    @classmethod
-    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
-        return {
-            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
-            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
-            "strides": [x.stride for x in input_shape],
-            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
-        }
-
-    def _calculate_anchors(self, sizes, aspect_ratios):
-        cell_anchors = [
-            self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios)
-        ]
-        return BufferList(cell_anchors)
-
-    @property
-    def num_cell_anchors(self):
-        """
-        Alias of `num_anchors`.
-        """
-        return self.num_anchors
-
-    @property
-    def num_anchors(self):
-        """
-        Returns:
-            list[int]: Each int is the number of anchors at every pixel
-                location, on that feature map.
-                For example, if at every pixel we use anchors of 3 aspect
-                ratios and 5 sizes, the number of anchors is 15.
-                (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config)
-
-                In standard RPN models, `num_anchors` on every feature map is the same.
-        """
-        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
-
-    def _grid_anchors(self, grid_sizes: List[List[int]]):
-        """
-        Returns:
-            list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4
-        """
-        anchors = []
-        for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
-            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
-            shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
-
-            anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
-
-        return anchors
-
-    def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
-        """
-        Generate a tensor storing canonical anchor boxes, which are all anchor
-        boxes of different sizes and aspect_ratios centered at (0, 0).
-        We can later build the set of anchors for a full feature map by
-        shifting and tiling these tensors (see `meth:_grid_anchors`).
-
-        Args:
-            sizes (tuple[float]):
-            aspect_ratios (tuple[float]]):
-
-        Returns:
-            Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes
-                in XYXY format.
-        """
-
-        # This is different from the anchor generator defined in the original Faster R-CNN
-        # code or Detectron. They yield the same AP, however the old version defines cell
-        # anchors in a less natural way with a shift relative to the feature grid and
-        # quantization that results in slightly different sizes for different aspect ratios.
-        # See also https://github.com/facebookresearch/Detectron/issues/227
-
-        anchors = []
-        for size in sizes:
-            area = size ** 2.0
-            for aspect_ratio in aspect_ratios:
-                # s * s = w * h
-                # a = h / w
-                # ... some algebra ...
-                # w = sqrt(s * s / a)
-                # h = a * w
-                w = math.sqrt(area / aspect_ratio)
-                h = aspect_ratio * w
-                x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
-                anchors.append([x0, y0, x1, y1])
-        return torch.tensor(anchors)
-
-    def forward(self, features):
-        """
-        Args:
-            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
-
-        Returns:
-            list[Boxes]: a list of Boxes containing all the anchors for each feature map
-                (i.e. the cell anchors repeated over all locations in the feature map).
-                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
-                where Hi, Wi are resolution of the feature map divided by anchor stride.
-        """
-        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
-        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
-        return [Boxes(x) for x in anchors_over_all_feature_maps]
-
-
-@ANCHOR_GENERATOR_REGISTRY.register()
-class RotatedAnchorGenerator(nn.Module):
-    """
-    Compute rotated anchors used by Rotated RPN (RRPN), described in
-    "Arbitrary-Oriented Scene Text Detection via Rotation Proposals".
-    """
-
-    box_dim: int = 5
-    """
-    the dimension of each anchor box.
-    """
-
-    @configurable
-    def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5):
-        """
-        This interface is experimental.
-
-        Args:
-            sizes (list[list[float]] or list[float]):
-                If sizes is list[list[float]], sizes[i] is the list of anchor sizes
-                (i.e. sqrt of anchor area) to use for the i-th feature map.
-                If sizes is list[float], the sizes are used for all feature maps.
-                Anchor sizes are given in absolute lengths in units of
-                the input image; they do not dynamically scale if the input image size changes.
-            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
-                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
-            strides (list[int]): stride of each input feature.
-            angles (list[list[float]] or list[float]): list of angles (in degrees CCW)
-                to use for anchors. Same "broadcast" rule for `sizes` applies.
-            offset (float): Relative offset between the center of the first anchor and the top-left
-                corner of the image. Value has to be in [0, 1).
-                Recommend to use 0.5, which means half stride.
-        """
-        super().__init__()
-
-        self.strides = strides
-        self.num_features = len(self.strides)
-        sizes = _broadcast_params(sizes, self.num_features, "sizes")
-        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
-        angles = _broadcast_params(angles, self.num_features, "angles")
-        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles)
-
-        self.offset = offset
-        assert 0.0 <= self.offset < 1.0, self.offset
-
-    @classmethod
-    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
-        return {
-            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
-            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
-            "strides": [x.stride for x in input_shape],
-            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
-            "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES,
-        }
-
-    def _calculate_anchors(self, sizes, aspect_ratios, angles):
-        cell_anchors = [
-            self.generate_cell_anchors(size, aspect_ratio, angle).float()
-            for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles)
-        ]
-        return BufferList(cell_anchors)
-
-    @property
-    def num_cell_anchors(self):
-        """
-        Alias of `num_anchors`.
-        """
-        return self.num_anchors
-
-    @property
-    def num_anchors(self):
-        """
-        Returns:
-            list[int]: Each int is the number of anchors at every pixel
-                location, on that feature map.
-                For example, if at every pixel we use anchors of 3 aspect
-                ratios, 2 sizes and 5 angles, the number of anchors is 30.
-                (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS
-                and ANCHOR_GENERATOR.ANGLES in config)
-
-                In standard RRPN models, `num_anchors` on every feature map is the same.
-        """
-        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
-
-    def _grid_anchors(self, grid_sizes):
-        anchors = []
-        for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
-            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
-            zeros = torch.zeros_like(shift_x)
-            shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1)
-
-            anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5))
-
-        return anchors
-
-    def generate_cell_anchors(
-        self,
-        sizes=(32, 64, 128, 256, 512),
-        aspect_ratios=(0.5, 1, 2),
-        angles=(-90, -60, -30, 0, 30, 60, 90),
-    ):
-        """
-        Generate a tensor storing canonical anchor boxes, which are all anchor
-        boxes of different sizes, aspect_ratios, angles centered at (0, 0).
-        We can later build the set of anchors for a full feature map by
-        shifting and tiling these tensors (see `meth:_grid_anchors`).
-
-        Args:
-            sizes (tuple[float]):
-            aspect_ratios (tuple[float]]):
-            angles (tuple[float]]):
-
-        Returns:
-            Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5)
-                storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format.
-        """
-        anchors = []
-        for size in sizes:
-            area = size ** 2.0
-            for aspect_ratio in aspect_ratios:
-                # s * s = w * h
-                # a = h / w
-                # ... some algebra ...
-                # w = sqrt(s * s / a)
-                # h = a * w
-                w = math.sqrt(area / aspect_ratio)
-                h = aspect_ratio * w
-                anchors.extend([0, 0, w, h, a] for a in angles)
-
-        return torch.tensor(anchors)
-
-    def forward(self, features):
-        """
-        Args:
-            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
-
-        Returns:
-            list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map
-                (i.e. the cell anchors repeated over all locations in the feature map).
-                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
-                where Hi, Wi are resolution of the feature map divided by anchor stride.
-        """
-        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
-        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
-        return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
-
-
-def build_anchor_generator(cfg, input_shape):
-    """
-    Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`.
-    """
-    anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME
-    return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py
deleted file mode 100644
index d477fb1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
-
-from .backbone import Backbone
-from .fpn import FPN
-from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
-# TODO can expose more resnet blocks after careful consideration
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py
deleted file mode 100644
index 66dee4a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from abc import ABCMeta, abstractmethod
-import torch.nn as nn
-
-from detectron2.layers import ShapeSpec
-
-__all__ = ["Backbone"]
-
-
-class Backbone(nn.Module, metaclass=ABCMeta):
-    """
-    Abstract base class for network backbones.
-    """
-
-    def __init__(self):
-        """
-        The `__init__` method of any subclass can specify its own set of arguments.
-        """
-        super().__init__()
-
-    @abstractmethod
-    def forward(self):
-        """
-        Subclasses must override this method, but adhere to the same return type.
-
-        Returns:
-            dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
-        """
-        pass
-
-    @property
-    def size_divisibility(self):
-        """
-        Some backbones require the input height and width to be divisible by a
-        specific integer. This is typically true for encoder / decoder type networks
-        with lateral connection (e.g., FPN) for which feature maps need to match
-        dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
-        input size divisibility is required.
-        """
-        return 0
-
-    def output_shape(self):
-        """
-        Returns:
-            dict[str->ShapeSpec]
-        """
-        # this is a backward-compatible default
-        return {
-            name: ShapeSpec(
-                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
-            )
-            for name in self._out_features
-        }
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py
deleted file mode 100644
index 3d2ecae..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from detectron2.layers import ShapeSpec
-from detectron2.utils.registry import Registry
-
-from .backbone import Backbone
-
-BACKBONE_REGISTRY = Registry("BACKBONE")
-BACKBONE_REGISTRY.__doc__ = """
-Registry for backbones, which extract feature maps from images
-
-The registered object must be a callable that accepts two arguments:
-
-1. A :class:`detectron2.config.CfgNode`
-2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
-
-It must returns an instance of :class:`Backbone`.
-"""
-
-
-def build_backbone(cfg, input_shape=None):
-    """
-    Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
-
-    Returns:
-        an instance of :class:`Backbone`
-    """
-    if input_shape is None:
-        input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
-
-    backbone_name = cfg.MODEL.BACKBONE.NAME
-    backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
-    assert isinstance(backbone, Backbone)
-    return backbone
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py
deleted file mode 100644
index 338b5f5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-import fvcore.nn.weight_init as weight_init
-import torch.nn.functional as F
-from torch import nn
-
-from detectron2.layers import Conv2d, ShapeSpec, get_norm
-
-from .backbone import Backbone
-from .build import BACKBONE_REGISTRY
-from .resnet import build_resnet_backbone
-
-__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"]
-
-
-class FPN(Backbone):
-    """
-    This module implements :paper:`FPN`.
-    It creates pyramid features built on top of some input feature maps.
-    """
-
-    def __init__(
-        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
-    ):
-        """
-        Args:
-            bottom_up (Backbone): module representing the bottom up subnetwork.
-                Must be a subclass of :class:`Backbone`. The multi-scale feature
-                maps generated by the bottom up network, and listed in `in_features`,
-                are used to generate FPN levels.
-            in_features (list[str]): names of the input feature maps coming
-                from the backbone to which FPN is attached. For example, if the
-                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
-                of these may be used; order must be from high to low resolution.
-            out_channels (int): number of channels in the output feature maps.
-            norm (str): the normalization to use.
-            top_block (nn.Module or None): if provided, an extra operation will
-                be performed on the output of the last (smallest resolution)
-                FPN output, and the result will extend the result list. The top_block
-                further downsamples the feature map. It must have an attribute
-                "num_levels", meaning the number of extra FPN levels added by
-                this block, and "in_feature", which is a string representing
-                its input feature (e.g., p5).
-            fuse_type (str): types for fusing the top down features and the lateral
-                ones. It can be "sum" (default), which sums up element-wise; or "avg",
-                which takes the element-wise mean of the two.
-        """
-        super(FPN, self).__init__()
-        assert isinstance(bottom_up, Backbone)
-
-        # Feature map strides and channels from the bottom up network (e.g. ResNet)
-        input_shapes = bottom_up.output_shape()
-        in_strides = [input_shapes[f].stride for f in in_features]
-        in_channels = [input_shapes[f].channels for f in in_features]
-
-        _assert_strides_are_log2_contiguous(in_strides)
-        lateral_convs = []
-        output_convs = []
-
-        use_bias = norm == ""
-        for idx, in_channels in enumerate(in_channels):
-            lateral_norm = get_norm(norm, out_channels)
-            output_norm = get_norm(norm, out_channels)
-
-            lateral_conv = Conv2d(
-                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
-            )
-            output_conv = Conv2d(
-                out_channels,
-                out_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                bias=use_bias,
-                norm=output_norm,
-            )
-            weight_init.c2_xavier_fill(lateral_conv)
-            weight_init.c2_xavier_fill(output_conv)
-            stage = int(math.log2(in_strides[idx]))
-            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
-            self.add_module("fpn_output{}".format(stage), output_conv)
-
-            lateral_convs.append(lateral_conv)
-            output_convs.append(output_conv)
-        # Place convs into top-down order (from low to high resolution)
-        # to make the top-down computation in forward clearer.
-        self.lateral_convs = lateral_convs[::-1]
-        self.output_convs = output_convs[::-1]
-        self.top_block = top_block
-        self.in_features = in_features
-        self.bottom_up = bottom_up
-        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
-        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides}
-        # top block output feature maps.
-        if self.top_block is not None:
-            for s in range(stage, stage + self.top_block.num_levels):
-                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
-
-        self._out_features = list(self._out_feature_strides.keys())
-        self._out_feature_channels = {k: out_channels for k in self._out_features}
-        self._size_divisibility = in_strides[-1]
-        assert fuse_type in {"avg", "sum"}
-        self._fuse_type = fuse_type
-
-    @property
-    def size_divisibility(self):
-        return self._size_divisibility
-
-    def forward(self, x):
-        """
-        Args:
-            input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to
-                feature map tensor for each feature level in high to low resolution order.
-
-        Returns:
-            dict[str->Tensor]:
-                mapping from feature map name to FPN feature map tensor
-                in high to low resolution order. Returned feature names follow the FPN
-                paper convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
-                ["p2", "p3", ..., "p6"].
-        """
-        # Reverse feature maps into top-down order (from low to high resolution)
-        bottom_up_features = self.bottom_up(x)
-        x = [bottom_up_features[f] for f in self.in_features[::-1]]
-        results = []
-        prev_features = self.lateral_convs[0](x[0])
-        results.append(self.output_convs[0](prev_features))
-        for features, lateral_conv, output_conv in zip(
-            x[1:], self.lateral_convs[1:], self.output_convs[1:]
-        ):
-            top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest")
-            lateral_features = lateral_conv(features)
-            prev_features = lateral_features + top_down_features
-            if self._fuse_type == "avg":
-                prev_features /= 2
-            results.insert(0, output_conv(prev_features))
-
-        if self.top_block is not None:
-            top_block_in_feature = bottom_up_features.get(self.top_block.in_feature, None)
-            if top_block_in_feature is None:
-                top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
-            results.extend(self.top_block(top_block_in_feature))
-        assert len(self._out_features) == len(results)
-        return dict(zip(self._out_features, results))
-
-    def output_shape(self):
-        return {
-            name: ShapeSpec(
-                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
-            )
-            for name in self._out_features
-        }
-
-
-def _assert_strides_are_log2_contiguous(strides):
-    """
-    Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2".
-    """
-    for i, stride in enumerate(strides[1:], 1):
-        assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format(
-            stride, strides[i - 1]
-        )
-
-
-class LastLevelMaxPool(nn.Module):
-    """
-    This module is used in the original FPN to generate a downsampled
-    P6 feature from P5.
-    """
-
-    def __init__(self):
-        super().__init__()
-        self.num_levels = 1
-        self.in_feature = "p5"
-
-    def forward(self, x):
-        return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)]
-
-
-class LastLevelP6P7(nn.Module):
-    """
-    This module is used in RetinaNet to generate extra layers, P6 and P7 from
-    C5 feature.
-    """
-
-    def __init__(self, in_channels, out_channels, in_feature="res5"):
-        super().__init__()
-        self.num_levels = 2
-        self.in_feature = in_feature
-        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
-        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
-        for module in [self.p6, self.p7]:
-            weight_init.c2_xavier_fill(module)
-
-    def forward(self, c5):
-        p6 = self.p6(c5)
-        p7 = self.p7(F.relu(p6))
-        return [p6, p7]
-
-
-@BACKBONE_REGISTRY.register()
-def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
-    """
-    Args:
-        cfg: a detectron2 CfgNode
-
-    Returns:
-        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
-    """
-    bottom_up = build_resnet_backbone(cfg, input_shape)
-    in_features = cfg.MODEL.FPN.IN_FEATURES
-    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
-    backbone = FPN(
-        bottom_up=bottom_up,
-        in_features=in_features,
-        out_channels=out_channels,
-        norm=cfg.MODEL.FPN.NORM,
-        top_block=LastLevelMaxPool(),
-        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
-    )
-    return backbone
-
-
-@BACKBONE_REGISTRY.register()
-def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
-    """
-    Args:
-        cfg: a detectron2 CfgNode
-
-    Returns:
-        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
-    """
-    bottom_up = build_resnet_backbone(cfg, input_shape)
-    in_features = cfg.MODEL.FPN.IN_FEATURES
-    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
-    in_channels_p6p7 = bottom_up.output_shape()["res5"].channels
-    backbone = FPN(
-        bottom_up=bottom_up,
-        in_features=in_features,
-        out_channels=out_channels,
-        norm=cfg.MODEL.FPN.NORM,
-        top_block=LastLevelP6P7(in_channels_p6p7, out_channels),
-        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
-    )
-    return backbone
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py
deleted file mode 100644
index f1faae0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py
+++ /dev/null
@@ -1,591 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import fvcore.nn.weight_init as weight_init
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-from detectron2.layers import (
-    CNNBlockBase,
-    Conv2d,
-    DeformConv,
-    ModulatedDeformConv,
-    ShapeSpec,
-    get_norm,
-)
-
-from .backbone import Backbone
-from .build import BACKBONE_REGISTRY
-
-__all__ = [
-    "ResNetBlockBase",
-    "BasicBlock",
-    "BottleneckBlock",
-    "DeformBottleneckBlock",
-    "BasicStem",
-    "ResNet",
-    "make_stage",
-    "build_resnet_backbone",
-]
-
-
-ResNetBlockBase = CNNBlockBase
-"""
-Alias for backward compatibiltiy.
-"""
-
-
-class BasicBlock(CNNBlockBase):
-    """
-    The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`,
-    with two 3x3 conv layers and a projection shortcut if needed.
-    """
-
-    def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"):
-        """
-        Args:
-            in_channels (int): Number of input channels.
-            out_channels (int): Number of output channels.
-            stride (int): Stride for the first conv.
-            norm (str or callable): normalization for all conv layers.
-                See :func:`layers.get_norm` for supported format.
-        """
-        super().__init__(in_channels, out_channels, stride)
-
-        if in_channels != out_channels:
-            self.shortcut = Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size=1,
-                stride=stride,
-                bias=False,
-                norm=get_norm(norm, out_channels),
-            )
-        else:
-            self.shortcut = None
-
-        self.conv1 = Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-
-        self.conv2 = Conv2d(
-            out_channels,
-            out_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-
-        for layer in [self.conv1, self.conv2, self.shortcut]:
-            if layer is not None:  # shortcut can be None
-                weight_init.c2_msra_fill(layer)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = F.relu_(out)
-        out = self.conv2(out)
-
-        if self.shortcut is not None:
-            shortcut = self.shortcut(x)
-        else:
-            shortcut = x
-
-        out += shortcut
-        out = F.relu_(out)
-        return out
-
-
-class BottleneckBlock(CNNBlockBase):
-    """
-    The standard bottleneck residual block used by ResNet-50, 101 and 152
-    defined in :paper:`ResNet`.  It contains 3 conv layers with kernels
-    1x1, 3x3, 1x1, and a projection shortcut if needed.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        *,
-        bottleneck_channels,
-        stride=1,
-        num_groups=1,
-        norm="BN",
-        stride_in_1x1=False,
-        dilation=1,
-    ):
-        """
-        Args:
-            bottleneck_channels (int): number of output channels for the 3x3
-                "bottleneck" conv layers.
-            num_groups (int): number of groups for the 3x3 conv layer.
-            norm (str or callable): normalization for all conv layers.
-                See :func:`layers.get_norm` for supported format.
-            stride_in_1x1 (bool): when stride>1, whether to put stride in the
-                first 1x1 convolution or the bottleneck 3x3 convolution.
-            dilation (int): the dilation rate of the 3x3 conv layer.
-        """
-        super().__init__(in_channels, out_channels, stride)
-
-        if in_channels != out_channels:
-            self.shortcut = Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size=1,
-                stride=stride,
-                bias=False,
-                norm=get_norm(norm, out_channels),
-            )
-        else:
-            self.shortcut = None
-
-        # The original MSRA ResNet models have stride in the first 1x1 conv
-        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
-        # stride in the 3x3 conv
-        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
-
-        self.conv1 = Conv2d(
-            in_channels,
-            bottleneck_channels,
-            kernel_size=1,
-            stride=stride_1x1,
-            bias=False,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        self.conv2 = Conv2d(
-            bottleneck_channels,
-            bottleneck_channels,
-            kernel_size=3,
-            stride=stride_3x3,
-            padding=1 * dilation,
-            bias=False,
-            groups=num_groups,
-            dilation=dilation,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        self.conv3 = Conv2d(
-            bottleneck_channels,
-            out_channels,
-            kernel_size=1,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-
-        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
-            if layer is not None:  # shortcut can be None
-                weight_init.c2_msra_fill(layer)
-
-        # Zero-initialize the last normalization in each residual branch,
-        # so that at the beginning, the residual branch starts with zeros,
-        # and each residual block behaves like an identity.
-        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
-        # "For BN layers, the learnable scaling coefficient γ is initialized
-        # to be 1, except for each residual block's last BN
-        # where γ is initialized to be 0."
-
-        # nn.init.constant_(self.conv3.norm.weight, 0)
-        # TODO this somehow hurts performance when training GN models from scratch.
-        # Add it as an option when we need to use this code to train a backbone.
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = F.relu_(out)
-
-        out = self.conv2(out)
-        out = F.relu_(out)
-
-        out = self.conv3(out)
-
-        if self.shortcut is not None:
-            shortcut = self.shortcut(x)
-        else:
-            shortcut = x
-
-        out += shortcut
-        out = F.relu_(out)
-        return out
-
-
-class DeformBottleneckBlock(ResNetBlockBase):
-    """
-    Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv <deformconv>`
-    in the 3x3 convolution.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        *,
-        bottleneck_channels,
-        stride=1,
-        num_groups=1,
-        norm="BN",
-        stride_in_1x1=False,
-        dilation=1,
-        deform_modulated=False,
-        deform_num_groups=1,
-    ):
-        super().__init__(in_channels, out_channels, stride)
-        self.deform_modulated = deform_modulated
-
-        if in_channels != out_channels:
-            self.shortcut = Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size=1,
-                stride=stride,
-                bias=False,
-                norm=get_norm(norm, out_channels),
-            )
-        else:
-            self.shortcut = None
-
-        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
-
-        self.conv1 = Conv2d(
-            in_channels,
-            bottleneck_channels,
-            kernel_size=1,
-            stride=stride_1x1,
-            bias=False,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        if deform_modulated:
-            deform_conv_op = ModulatedDeformConv
-            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
-            offset_channels = 27
-        else:
-            deform_conv_op = DeformConv
-            offset_channels = 18
-
-        self.conv2_offset = Conv2d(
-            bottleneck_channels,
-            offset_channels * deform_num_groups,
-            kernel_size=3,
-            stride=stride_3x3,
-            padding=1 * dilation,
-            dilation=dilation,
-        )
-        self.conv2 = deform_conv_op(
-            bottleneck_channels,
-            bottleneck_channels,
-            kernel_size=3,
-            stride=stride_3x3,
-            padding=1 * dilation,
-            bias=False,
-            groups=num_groups,
-            dilation=dilation,
-            deformable_groups=deform_num_groups,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        self.conv3 = Conv2d(
-            bottleneck_channels,
-            out_channels,
-            kernel_size=1,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-
-        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
-            if layer is not None:  # shortcut can be None
-                weight_init.c2_msra_fill(layer)
-
-        nn.init.constant_(self.conv2_offset.weight, 0)
-        nn.init.constant_(self.conv2_offset.bias, 0)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = F.relu_(out)
-
-        if self.deform_modulated:
-            offset_mask = self.conv2_offset(out)
-            offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1)
-            offset = torch.cat((offset_x, offset_y), dim=1)
-            mask = mask.sigmoid()
-            out = self.conv2(out, offset, mask)
-        else:
-            offset = self.conv2_offset(out)
-            out = self.conv2(out, offset)
-        out = F.relu_(out)
-
-        out = self.conv3(out)
-
-        if self.shortcut is not None:
-            shortcut = self.shortcut(x)
-        else:
-            shortcut = x
-
-        out += shortcut
-        out = F.relu_(out)
-        return out
-
-
-def make_stage(block_class, num_blocks, first_stride, *, in_channels, out_channels, **kwargs):
-    """
-    Create a list of blocks just like those in a ResNet stage.
-
-    Args:
-        block_class (type): a subclass of ResNetBlockBase
-        num_blocks (int):
-        first_stride (int): the stride of the first block. The other blocks will have stride=1.
-        in_channels (int): input channels of the entire stage.
-        out_channels (int): output channels of **every block** in the stage.
-        kwargs: other arguments passed to the constructor of every block.
-
-    Returns:
-        list[nn.Module]: a list of block module.
-    """
-    assert "stride" not in kwargs, "Stride of blocks in make_stage cannot be changed."
-    blocks = []
-    for i in range(num_blocks):
-        blocks.append(
-            block_class(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                stride=first_stride if i == 0 else 1,
-                **kwargs,
-            )
-        )
-        in_channels = out_channels
-    return blocks
-
-
-class BasicStem(CNNBlockBase):
-    """
-    The standard ResNet stem (layers before the first residual block).
-    """
-
-    def __init__(self, in_channels=3, out_channels=64, norm="BN"):
-        """
-        Args:
-            norm (str or callable): norm after the first conv layer.
-                See :func:`layers.get_norm` for supported format.
-        """
-        super().__init__(in_channels, out_channels, 4)
-        self.in_channels = in_channels
-        self.conv1 = Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size=7,
-            stride=2,
-            padding=3,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-        weight_init.c2_msra_fill(self.conv1)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = F.relu_(x)
-        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
-        return x
-
-
-class ResNet(Backbone):
-    """
-    Implement :paper:`ResNet`.
-    """
-
-    def __init__(self, stem, stages, num_classes=None, out_features=None):
-        """
-        Args:
-            stem (nn.Module): a stem module
-            stages (list[list[CNNBlockBase]]): several (typically 4) stages,
-                each contains multiple :class:`CNNBlockBase`.
-            num_classes (None or int): if None, will not perform classification.
-                Otherwise, will create a linear layer.
-            out_features (list[str]): name of the layers whose outputs should
-                be returned in forward. Can be anything in "stem", "linear", or "res2" ...
-                If None, will return the output of the last layer.
-        """
-        super(ResNet, self).__init__()
-        self.stem = stem
-        self.num_classes = num_classes
-
-        current_stride = self.stem.stride
-        self._out_feature_strides = {"stem": current_stride}
-        self._out_feature_channels = {"stem": self.stem.out_channels}
-
-        self.stages_and_names = []
-        for i, blocks in enumerate(stages):
-            assert len(blocks) > 0, len(blocks)
-            for block in blocks:
-                assert isinstance(block, CNNBlockBase), block
-
-            name = "res" + str(i + 2)
-            stage = nn.Sequential(*blocks)
-
-            self.add_module(name, stage)
-            self.stages_and_names.append((stage, name))
-
-            self._out_feature_strides[name] = current_stride = int(
-                current_stride * np.prod([k.stride for k in blocks])
-            )
-            self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels
-
-        if num_classes is not None:
-            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-            self.linear = nn.Linear(curr_channels, num_classes)
-
-            # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
-            # "The 1000-way fully-connected layer is initialized by
-            # drawing weights from a zero-mean Gaussian with standard deviation of 0.01."
-            nn.init.normal_(self.linear.weight, std=0.01)
-            name = "linear"
-
-        if out_features is None:
-            out_features = [name]
-        self._out_features = out_features
-        assert len(self._out_features)
-        children = [x[0] for x in self.named_children()]
-        for out_feature in self._out_features:
-            assert out_feature in children, "Available children: {}".format(", ".join(children))
-
-    def forward(self, x):
-        outputs = {}
-        x = self.stem(x)
-        if "stem" in self._out_features:
-            outputs["stem"] = x
-        for stage, name in self.stages_and_names:
-            x = stage(x)
-            if name in self._out_features:
-                outputs[name] = x
-        if self.num_classes is not None:
-            x = self.avgpool(x)
-            x = torch.flatten(x, 1)
-            x = self.linear(x)
-            if "linear" in self._out_features:
-                outputs["linear"] = x
-        return outputs
-
-    def output_shape(self):
-        return {
-            name: ShapeSpec(
-                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
-            )
-            for name in self._out_features
-        }
-
-    def freeze(self, freeze_at=0):
-        """
-        Freeze the first several stages of the ResNet. Commonly used in
-        fine-tuning.
-
-        Layers that produce the same feature map spatial size are defined as one
-        "stage" by :paper:`FPN`.
-
-        Args:
-            freeze_at (int): number of stages to freeze.
-                `1` means freezing the stem. `2` means freezing the stem and
-                one residual stage, etc.
-
-        Returns:
-            nn.Module: this ResNet itself
-        """
-        if freeze_at >= 1:
-            self.stem.freeze()
-        for idx, (stage, _) in enumerate(self.stages_and_names, start=2):
-            if freeze_at >= idx:
-                for block in stage.children():
-                    block.freeze()
-        return self
-
-
-@BACKBONE_REGISTRY.register()
-def build_resnet_backbone(cfg, input_shape):
-    """
-    Create a ResNet instance from config.
-
-    Returns:
-        ResNet: a :class:`ResNet` instance.
-    """
-    # need registration of new blocks/stems?
-    norm = cfg.MODEL.RESNETS.NORM
-    stem = BasicStem(
-        in_channels=input_shape.channels,
-        out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
-        norm=norm,
-    )
-
-    # fmt: off
-    freeze_at           = cfg.MODEL.BACKBONE.FREEZE_AT
-    out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
-    depth               = cfg.MODEL.RESNETS.DEPTH
-    num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
-    width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
-    bottleneck_channels = num_groups * width_per_group
-    in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
-    out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
-    stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
-    res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
-    deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
-    deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
-    deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
-    # fmt: on
-    assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
-
-    num_blocks_per_stage = {
-        18: [2, 2, 2, 2],
-        34: [3, 4, 6, 3],
-        50: [3, 4, 6, 3],
-        101: [3, 4, 23, 3],
-        152: [3, 8, 36, 3],
-    }[depth]
-
-    if depth in [18, 34]:
-        assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34"
-        assert not any(
-            deform_on_per_stage
-        ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34"
-        assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34"
-        assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34"
-
-    stages = []
-
-    # Avoid creating variables without gradients
-    # It consumes extra memory and may cause allreduce to fail
-    out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features]
-    max_stage_idx = max(out_stage_idx)
-    for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
-        dilation = res5_dilation if stage_idx == 5 else 1
-        first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
-        stage_kargs = {
-            "num_blocks": num_blocks_per_stage[idx],
-            "first_stride": first_stride,
-            "in_channels": in_channels,
-            "out_channels": out_channels,
-            "norm": norm,
-        }
-        # Use BasicBlock for R18 and R34.
-        if depth in [18, 34]:
-            stage_kargs["block_class"] = BasicBlock
-        else:
-            stage_kargs["bottleneck_channels"] = bottleneck_channels
-            stage_kargs["stride_in_1x1"] = stride_in_1x1
-            stage_kargs["dilation"] = dilation
-            stage_kargs["num_groups"] = num_groups
-            if deform_on_per_stage[idx]:
-                stage_kargs["block_class"] = DeformBottleneckBlock
-                stage_kargs["deform_modulated"] = deform_modulated
-                stage_kargs["deform_num_groups"] = deform_num_groups
-            else:
-                stage_kargs["block_class"] = BottleneckBlock
-        blocks = make_stage(**stage_kargs)
-        in_channels = out_channels
-        out_channels *= 2
-        bottleneck_channels *= 2
-        stages.append(blocks)
-    return ResNet(stem, stages, out_features=out_features).freeze(freeze_at)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py
deleted file mode 100644
index 88426fd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py
+++ /dev/null
@@ -1,247 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from typing import Tuple
-import torch
-
-# Value for clamping large dw and dh predictions. The heuristic is that we clamp
-# such that dw and dh are no larger than what would transform a 16px box into a
-# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px).
-_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
-
-
-__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated"]
-
-
-def apply_deltas_broadcast(box2box_transform, deltas, boxes):
-    """
-    Apply transform deltas to boxes. Similar to `box2box_transform.apply_deltas`,
-    but allow broadcasting boxes when the second dimension of deltas is a multiple
-    of box dimension.
-
-    Args:
-        box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): the transform to apply
-        deltas (Tensor): tensor of shape (N,B) or (N,KxB)
-        boxes (Tensor): tensor of shape (N,B)
-
-    Returns:
-        Tensor: same shape as deltas.
-    """
-    assert deltas.dim() == boxes.dim() == 2, f"{deltas.shape}, {boxes.shape}"
-    N, B = boxes.shape
-    assert (
-        deltas.shape[1] % B == 0
-    ), f"Second dim of deltas should be a multiple of {B}. Got {deltas.shape}"
-    K = deltas.shape[1] // B
-    ret = box2box_transform.apply_deltas(
-        deltas.view(N * K, B), boxes.unsqueeze(1).expand(N, K, B).reshape(N * K, B)
-    )
-    return ret.view(N, K * B)
-
-
-@torch.jit.script
-class Box2BoxTransform(object):
-    """
-    The box-to-box transform defined in R-CNN. The transformation is parameterized
-    by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height
-    by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height).
-    """
-
-    def __init__(
-        self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP
-    ):
-        """
-        Args:
-            weights (4-element tuple): Scaling factors that are applied to the
-                (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set
-                such that the deltas have unit variance; now they are treated as
-                hyperparameters of the system.
-            scale_clamp (float): When predicting deltas, the predicted box scaling
-                factors (dw and dh) are clamped such that they are <= scale_clamp.
-        """
-        self.weights = weights
-        self.scale_clamp = scale_clamp
-
-    def get_deltas(self, src_boxes, target_boxes):
-        """
-        Get box regression transformation deltas (dx, dy, dw, dh) that can be used
-        to transform the `src_boxes` into the `target_boxes`. That is, the relation
-        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
-        any delta is too large and is clamped).
-
-        Args:
-            src_boxes (Tensor): source boxes, e.g., object proposals
-            target_boxes (Tensor): target of the transformation, e.g., ground-truth
-                boxes.
-        """
-        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
-        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
-
-        src_widths = src_boxes[:, 2] - src_boxes[:, 0]
-        src_heights = src_boxes[:, 3] - src_boxes[:, 1]
-        src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths
-        src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights
-
-        target_widths = target_boxes[:, 2] - target_boxes[:, 0]
-        target_heights = target_boxes[:, 3] - target_boxes[:, 1]
-        target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths
-        target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights
-
-        wx, wy, ww, wh = self.weights
-        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
-        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
-        dw = ww * torch.log(target_widths / src_widths)
-        dh = wh * torch.log(target_heights / src_heights)
-
-        deltas = torch.stack((dx, dy, dw, dh), dim=1)
-        assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!"
-        return deltas
-
-    def apply_deltas(self, deltas, boxes):
-        """
-        Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
-
-        Args:
-            deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
-                deltas[i] represents k potentially different class-specific
-                box transformations for the single box boxes[i].
-            boxes (Tensor): boxes to transform, of shape (N, 4)
-        """
-        boxes = boxes.to(deltas.dtype)
-
-        widths = boxes[:, 2] - boxes[:, 0]
-        heights = boxes[:, 3] - boxes[:, 1]
-        ctr_x = boxes[:, 0] + 0.5 * widths
-        ctr_y = boxes[:, 1] + 0.5 * heights
-
-        wx, wy, ww, wh = self.weights
-        dx = deltas[:, 0::4] / wx
-        dy = deltas[:, 1::4] / wy
-        dw = deltas[:, 2::4] / ww
-        dh = deltas[:, 3::4] / wh
-
-        # Prevent sending too large values into torch.exp()
-        dw = torch.clamp(dw, max=self.scale_clamp)
-        dh = torch.clamp(dh, max=self.scale_clamp)
-
-        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
-        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
-        pred_w = torch.exp(dw) * widths[:, None]
-        pred_h = torch.exp(dh) * heights[:, None]
-
-        pred_boxes = torch.zeros_like(deltas)
-        pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w  # x1
-        pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h  # y1
-        pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w  # x2
-        pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h  # y2
-        return pred_boxes
-
-
-@torch.jit.script
-class Box2BoxTransformRotated(object):
-    """
-    The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized
-    by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height
-    by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height),
-    and rotate a box's angle by da (radians).
-    Note: angles of deltas are in radians while angles of boxes are in degrees.
-    """
-
-    def __init__(
-        self,
-        weights: Tuple[float, float, float, float, float],
-        scale_clamp: float = _DEFAULT_SCALE_CLAMP,
-    ):
-        """
-        Args:
-            weights (5-element tuple): Scaling factors that are applied to the
-                (dx, dy, dw, dh, da) deltas. These are treated as
-                hyperparameters of the system.
-            scale_clamp (float): When predicting deltas, the predicted box scaling
-                factors (dw and dh) are clamped such that they are <= scale_clamp.
-        """
-        self.weights = weights
-        self.scale_clamp = scale_clamp
-
-    def get_deltas(self, src_boxes, target_boxes):
-        """
-        Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used
-        to transform the `src_boxes` into the `target_boxes`. That is, the relation
-        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
-        any delta is too large and is clamped).
-
-        Args:
-            src_boxes (Tensor): Nx5 source boxes, e.g., object proposals
-            target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth
-                boxes.
-        """
-        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
-        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
-
-        src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1)
-
-        target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind(
-            target_boxes, dim=1
-        )
-
-        wx, wy, ww, wh, wa = self.weights
-        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
-        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
-        dw = ww * torch.log(target_widths / src_widths)
-        dh = wh * torch.log(target_heights / src_heights)
-        # Angles of deltas are in radians while angles of boxes are in degrees.
-        # the conversion to radians serve as a way to normalize the values
-        da = target_angles - src_angles
-        da = (da + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
-        da *= wa * math.pi / 180.0
-
-        deltas = torch.stack((dx, dy, dw, dh, da), dim=1)
-        assert (
-            (src_widths > 0).all().item()
-        ), "Input boxes to Box2BoxTransformRotated are not valid!"
-        return deltas
-
-    def apply_deltas(self, deltas, boxes):
-        """
-        Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`.
-
-        Args:
-            deltas (Tensor): transformation deltas of shape (N, 5).
-                deltas[i] represents box transformation for the single box boxes[i].
-            boxes (Tensor): boxes to transform, of shape (N, 5)
-        """
-        assert deltas.shape[1] == 5 and boxes.shape[1] == 5
-
-        boxes = boxes.to(deltas.dtype)
-
-        ctr_x = boxes[:, 0]
-        ctr_y = boxes[:, 1]
-        widths = boxes[:, 2]
-        heights = boxes[:, 3]
-        angles = boxes[:, 4]
-
-        wx, wy, ww, wh, wa = self.weights
-
-        dx = deltas[:, 0] / wx
-        dy = deltas[:, 1] / wy
-        dw = deltas[:, 2] / ww
-        dh = deltas[:, 3] / wh
-        da = deltas[:, 4] / wa
-
-        # Prevent sending too large values into torch.exp()
-        dw = torch.clamp(dw, max=self.scale_clamp)
-        dh = torch.clamp(dh, max=self.scale_clamp)
-
-        pred_boxes = torch.zeros_like(deltas)
-        pred_boxes[:, 0] = dx * widths + ctr_x  # x_ctr
-        pred_boxes[:, 1] = dy * heights + ctr_y  # y_ctr
-        pred_boxes[:, 2] = torch.exp(dw) * widths  # width
-        pred_boxes[:, 3] = torch.exp(dh) * heights  # height
-
-        # Following original RRPN implementation,
-        # angles of deltas are in radians while angles of boxes are in degrees.
-        pred_angle = da * 180.0 / math.pi + angles
-        pred_angle = (pred_angle + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
-
-        pred_boxes[:, 4] = pred_angle
-
-        return pred_boxes
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py
deleted file mode 100644
index 2911f8c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import List
-import torch
-
-
-class Matcher(object):
-    """
-    This class assigns to each predicted "element" (e.g., a box) a ground-truth
-    element. Each predicted element will have exactly zero or one matches; each
-    ground-truth element may be matched to zero or more predicted elements.
-
-    The matching is determined by the MxN match_quality_matrix, that characterizes
-    how well each (ground-truth, prediction)-pair match each other. For example,
-    if the elements are boxes, this matrix may contain box intersection-over-union
-    overlap values.
-
-    The matcher returns (a) a vector of length N containing the index of the
-    ground-truth element m in [0, M) that matches to prediction n in [0, N).
-    (b) a vector of length N containing the labels for each prediction.
-    """
-
-    def __init__(
-        self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
-    ):
-        """
-        Args:
-            thresholds (list): a list of thresholds used to stratify predictions
-                into levels.
-            labels (list): a list of values to label predictions belonging at
-                each level. A label can be one of {-1, 0, 1} signifying
-                {ignore, negative class, positive class}, respectively.
-            allow_low_quality_matches (bool): if True, produce additional matches
-                for predictions with maximum match quality lower than high_threshold.
-                See set_low_quality_matches_ for more details.
-
-            For example,
-                thresholds = [0.3, 0.5]
-                labels = [0, -1, 1]
-                All predictions with iou < 0.3 will be marked with 0 and
-                thus will be considered as false positives while training.
-                All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
-                thus will be ignored.
-                All predictions with 0.5 <= iou will be marked with 1 and
-                thus will be considered as true positives.
-        """
-        # Add -inf and +inf to first and last position in thresholds
-        thresholds = thresholds[:]
-        assert thresholds[0] > 0
-        thresholds.insert(0, -float("inf"))
-        thresholds.append(float("inf"))
-        assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
-        assert all(l in [-1, 0, 1] for l in labels)
-        assert len(labels) == len(thresholds) - 1
-        self.thresholds = thresholds
-        self.labels = labels
-        self.allow_low_quality_matches = allow_low_quality_matches
-
-    def __call__(self, match_quality_matrix):
-        """
-        Args:
-            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
-                pairwise quality between M ground-truth elements and N predicted
-                elements. All elements must be >= 0 (due to the us of `torch.nonzero`
-                for selecting indices in :meth:`set_low_quality_matches_`).
-
-        Returns:
-            matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
-                ground-truth index in [0, M)
-            match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
-                whether a prediction is a true or false positive or ignored
-        """
-        assert match_quality_matrix.dim() == 2
-        if match_quality_matrix.numel() == 0:
-            default_matches = match_quality_matrix.new_full(
-                (match_quality_matrix.size(1),), 0, dtype=torch.int64
-            )
-            # When no gt boxes exist, we define IOU = 0 and therefore set labels
-            # to `self.labels[0]`, which usually defaults to background class 0
-            # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
-            default_match_labels = match_quality_matrix.new_full(
-                (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
-            )
-            return default_matches, default_match_labels
-
-        assert torch.all(match_quality_matrix >= 0)
-
-        # match_quality_matrix is M (gt) x N (predicted)
-        # Max over gt elements (dim 0) to find best gt candidate for each prediction
-        matched_vals, matches = match_quality_matrix.max(dim=0)
-
-        match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
-
-        for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
-            low_high = (matched_vals >= low) & (matched_vals < high)
-            match_labels[low_high] = l
-
-        if self.allow_low_quality_matches:
-            self.set_low_quality_matches_(match_labels, match_quality_matrix)
-
-        return matches, match_labels
-
-    def set_low_quality_matches_(self, match_labels, match_quality_matrix):
-        """
-        Produce additional matches for predictions that have only low-quality matches.
-        Specifically, for each ground-truth G find the set of predictions that have
-        maximum overlap with it (including ties); for each prediction in that set, if
-        it is unmatched, then match it to the ground-truth G.
-
-        This function implements the RPN assignment case (i) in Sec. 3.1.2 of
-        :paper:`Faster R-CNN`.
-        """
-        # For each gt, find the prediction with which it has highest quality
-        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
-        # Find the highest quality match available, even if it is low, including ties.
-        # Note that the matches qualities must be positive due to the use of
-        # `torch.nonzero`.
-        _, pred_inds_with_highest_quality = torch.nonzero(
-            match_quality_matrix == highest_quality_foreach_gt[:, None], as_tuple=True
-        )
-        # If an anchor was labeled positive only due to a low-quality match
-        # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
-        # This follows the implementation in Detectron, and is found to have no significant impact.
-        match_labels[pred_inds_with_highest_quality] = 1
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py
deleted file mode 100644
index 96ef9b5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from .build import META_ARCH_REGISTRY, build_model  # isort:skip
-
-from .panoptic_fpn import PanopticFPN
-
-# import all the meta_arch, so they will be registered
-from .rcnn import GeneralizedRCNN, ProposalNetwork
-from .retinanet import RetinaNet
-from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py
deleted file mode 100644
index 630389d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-
-from detectron2.utils.registry import Registry
-
-META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
-META_ARCH_REGISTRY.__doc__ = """
-Registry for meta-architectures, i.e. the whole model.
-
-The registered object will be called with `obj(cfg)`
-and expected to return a `nn.Module` object.
-"""
-
-
-def build_model(cfg):
-    """
-    Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
-    Note that it does not load any weights from ``cfg``.
-    """
-    meta_arch = cfg.MODEL.META_ARCHITECTURE
-    model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
-    model.to(torch.device(cfg.MODEL.DEVICE))
-    return model
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py
deleted file mode 100644
index c5f92f7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import torch
-from torch import nn
-
-from detectron2.structures import ImageList
-
-from ..backbone import build_backbone
-from ..postprocessing import detector_postprocess, sem_seg_postprocess
-from ..proposal_generator import build_proposal_generator
-from ..roi_heads import build_roi_heads
-from .build import META_ARCH_REGISTRY
-from .semantic_seg import build_sem_seg_head
-
-__all__ = ["PanopticFPN"]
-
-
-@META_ARCH_REGISTRY.register()
-class PanopticFPN(nn.Module):
-    """
-    Implement the paper :paper:`PanopticFPN`.
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-
-        self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT
-
-        # options when combining instance & semantic outputs
-        self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED
-        self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH
-        self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT
-        self.combine_instances_confidence_threshold = (
-            cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH
-        )
-
-        self.backbone = build_backbone(cfg)
-        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
-        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
-        self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape())
-
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
-                Each item in the list contains the inputs for one image.
-
-                For now, each item in the list is a dict that contains:
-
-                * "image": Tensor, image in (C, H, W) format.
-                * "instances": Instances
-                * "sem_seg": semantic segmentation ground truth.
-                * Other information that's included in the original dicts, such as:
-                  "height", "width" (int): the output resolution of the model, used in inference.
-                  See :meth:`postprocess` for details.
-
-        Returns:
-            list[dict]:
-                each dict is the results for one image. The dict contains the following keys:
-
-                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
-                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
-                * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`.
-                  See the return value of
-                  :func:`combine_semantic_and_instance_outputs` for its format.
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-        features = self.backbone(images.tensor)
-
-        if "proposals" in batched_inputs[0]:
-            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
-            proposal_losses = {}
-
-        if "sem_seg" in batched_inputs[0]:
-            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
-            gt_sem_seg = ImageList.from_tensors(
-                gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
-            ).tensor
-        else:
-            gt_sem_seg = None
-        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg)
-
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-        if self.proposal_generator:
-            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
-        detector_results, detector_losses = self.roi_heads(
-            images, features, proposals, gt_instances
-        )
-
-        if self.training:
-            losses = {}
-            losses.update(sem_seg_losses)
-            losses.update({k: v * self.instance_loss_weight for k, v in detector_losses.items()})
-            losses.update(proposal_losses)
-            return losses
-
-        processed_results = []
-        for sem_seg_result, detector_result, input_per_image, image_size in zip(
-            sem_seg_results, detector_results, batched_inputs, images.image_sizes
-        ):
-            height = input_per_image.get("height", image_size[0])
-            width = input_per_image.get("width", image_size[1])
-            sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
-            detector_r = detector_postprocess(detector_result, height, width)
-
-            processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
-
-            if self.combine_on:
-                panoptic_r = combine_semantic_and_instance_outputs(
-                    detector_r,
-                    sem_seg_r.argmax(dim=0),
-                    self.combine_overlap_threshold,
-                    self.combine_stuff_area_limit,
-                    self.combine_instances_confidence_threshold,
-                )
-                processed_results[-1]["panoptic_seg"] = panoptic_r
-        return processed_results
-
-
-def combine_semantic_and_instance_outputs(
-    instance_results,
-    semantic_results,
-    overlap_threshold,
-    stuff_area_limit,
-    instances_confidence_threshold,
-):
-    """
-    Implement a simple combining logic following
-    "combine_semantic_and_instance_predictions.py" in panopticapi
-    to produce panoptic segmentation outputs.
-
-    Args:
-        instance_results: output of :func:`detector_postprocess`.
-        semantic_results: an (H, W) tensor, each is the contiguous semantic
-            category id
-
-    Returns:
-        panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment.
-        segments_info (list[dict]): Describe each segment in `panoptic_seg`.
-            Each dict contains keys "id", "category_id", "isthing".
-    """
-    panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32)
-
-    # sort instance outputs by scores
-    sorted_inds = torch.argsort(-instance_results.scores)
-
-    current_segment_id = 0
-    segments_info = []
-
-    instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device)
-
-    # Add instances one-by-one, check for overlaps with existing ones
-    for inst_id in sorted_inds:
-        score = instance_results.scores[inst_id].item()
-        if score < instances_confidence_threshold:
-            break
-        mask = instance_masks[inst_id]  # H,W
-        mask_area = mask.sum().item()
-
-        if mask_area == 0:
-            continue
-
-        intersect = (mask > 0) & (panoptic_seg > 0)
-        intersect_area = intersect.sum().item()
-
-        if intersect_area * 1.0 / mask_area > overlap_threshold:
-            continue
-
-        if intersect_area > 0:
-            mask = mask & (panoptic_seg == 0)
-
-        current_segment_id += 1
-        panoptic_seg[mask] = current_segment_id
-        segments_info.append(
-            {
-                "id": current_segment_id,
-                "isthing": True,
-                "score": score,
-                "category_id": instance_results.pred_classes[inst_id].item(),
-                "instance_id": inst_id.item(),
-            }
-        )
-
-    # Add semantic results to remaining empty areas
-    semantic_labels = torch.unique(semantic_results).cpu().tolist()
-    for semantic_label in semantic_labels:
-        if semantic_label == 0:  # 0 is a special "thing" class
-            continue
-        mask = (semantic_results == semantic_label) & (panoptic_seg == 0)
-        mask_area = mask.sum().item()
-        if mask_area < stuff_area_limit:
-            continue
-
-        current_segment_id += 1
-        panoptic_seg[mask] = current_segment_id
-        segments_info.append(
-            {
-                "id": current_segment_id,
-                "isthing": False,
-                "category_id": semantic_label,
-                "area": mask_area,
-            }
-        )
-
-    return panoptic_seg, segments_info
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py
deleted file mode 100644
index b15ea8a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import numpy as np
-import torch
-from torch import nn
-
-from detectron2.structures import ImageList
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.logger import log_first_n
-
-from ..backbone import build_backbone
-from ..postprocessing import detector_postprocess
-from ..proposal_generator import build_proposal_generator
-from ..roi_heads import build_roi_heads
-from .build import META_ARCH_REGISTRY
-
-__all__ = ["GeneralizedRCNN", "ProposalNetwork"]
-
-
-@META_ARCH_REGISTRY.register()
-class GeneralizedRCNN(nn.Module):
-    """
-    Generalized R-CNN. Any models that contains the following three components:
-    1. Per-image feature extraction (aka backbone)
-    2. Region proposal generation
-    3. Per-region feature extraction and prediction
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-
-        self.backbone = build_backbone(cfg)
-        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
-        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
-        self.vis_period = cfg.VIS_PERIOD
-        self.input_format = cfg.INPUT.FORMAT
-
-        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def visualize_training(self, batched_inputs, proposals):
-        """
-        A function used to visualize images and proposals. It shows ground truth
-        bounding boxes on the original image and up to 20 predicted object
-        proposals on the original image. Users can implement different
-        visualization functions for different models.
-
-        Args:
-            batched_inputs (list): a list that contains input to the model.
-            proposals (list): a list that contains predicted proposals. Both
-                batched_inputs and proposals should have the same length.
-        """
-        from detectron2.utils.visualizer import Visualizer
-
-        storage = get_event_storage()
-        max_vis_prop = 20
-
-        for input, prop in zip(batched_inputs, proposals):
-            img = input["image"].cpu().numpy()
-            assert img.shape[0] == 3, "Images should have 3 channels."
-            if self.input_format == "BGR":
-                img = img[::-1, :, :]
-            img = img.transpose(1, 2, 0)
-            v_gt = Visualizer(img, None)
-            v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes)
-            anno_img = v_gt.get_image()
-            box_size = min(len(prop.proposal_boxes), max_vis_prop)
-            v_pred = Visualizer(img, None)
-            v_pred = v_pred.overlay_instances(
-                boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy()
-            )
-            prop_img = v_pred.get_image()
-            vis_img = np.concatenate((anno_img, prop_img), axis=1)
-            vis_img = vis_img.transpose(2, 0, 1)
-            vis_name = "Left: GT bounding boxes;  Right: Predicted proposals"
-            storage.put_image(vis_name, vis_img)
-            break  # only visualize one image in a batch
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
-                Each item in the list contains the inputs for one image.
-                For now, each item in the list is a dict that contains:
-
-                * image: Tensor, image in (C, H, W) format.
-                * instances (optional): groundtruth :class:`Instances`
-                * proposals (optional): :class:`Instances`, precomputed proposals.
-
-                Other information that's included in the original dicts, such as:
-
-                * "height", "width" (int): the output resolution of the model, used in inference.
-                  See :meth:`postprocess` for details.
-
-        Returns:
-            list[dict]:
-                Each dict is the output for one input image.
-                The dict contains one key "instances" whose value is a :class:`Instances`.
-                The :class:`Instances` object has the following keys:
-                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
-        """
-        if not self.training:
-            return self.inference(batched_inputs)
-
-        images = self.preprocess_image(batched_inputs)
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        elif "targets" in batched_inputs[0]:
-            log_first_n(
-                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
-            )
-            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-
-        features = self.backbone(images.tensor)
-
-        if self.proposal_generator:
-            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
-        else:
-            assert "proposals" in batched_inputs[0]
-            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
-            proposal_losses = {}
-
-        _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
-        if self.vis_period > 0:
-            storage = get_event_storage()
-            if storage.iter % self.vis_period == 0:
-                self.visualize_training(batched_inputs, proposals)
-
-        losses = {}
-        losses.update(detector_losses)
-        losses.update(proposal_losses)
-        return losses
-
-    def inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
-        """
-        Run inference on the given inputs.
-
-        Args:
-            batched_inputs (list[dict]): same as in :meth:`forward`
-            detected_instances (None or list[Instances]): if not None, it
-                contains an `Instances` object per image. The `Instances`
-                object contains "pred_boxes" and "pred_classes" which are
-                known boxes in the image.
-                The inference will then skip the detection of bounding boxes,
-                and only predict other per-ROI outputs.
-            do_postprocess (bool): whether to apply post-processing on the outputs.
-
-        Returns:
-            same as in :meth:`forward`.
-        """
-        assert not self.training
-
-        images = self.preprocess_image(batched_inputs)
-        features = self.backbone(images.tensor)
-
-        if detected_instances is None:
-            if self.proposal_generator:
-                proposals, _ = self.proposal_generator(images, features, None)
-            else:
-                assert "proposals" in batched_inputs[0]
-                proposals = [x["proposals"].to(self.device) for x in batched_inputs]
-
-            results, _ = self.roi_heads(images, features, proposals, None)
-        else:
-            detected_instances = [x.to(self.device) for x in detected_instances]
-            results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
-
-        if do_postprocess:
-            return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes)
-        else:
-            return results
-
-    def preprocess_image(self, batched_inputs):
-        """
-        Normalize, pad and batch the input images.
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-        return images
-
-    @staticmethod
-    def _postprocess(instances, batched_inputs, image_sizes):
-        """
-        Rescale the output instances to the target size.
-        """
-        # note: private function; subject to changes
-        processed_results = []
-        for results_per_image, input_per_image, image_size in zip(
-            instances, batched_inputs, image_sizes
-        ):
-            height = input_per_image.get("height", image_size[0])
-            width = input_per_image.get("width", image_size[1])
-            r = detector_postprocess(results_per_image, height, width)
-            processed_results.append({"instances": r})
-        return processed_results
-
-
-@META_ARCH_REGISTRY.register()
-class ProposalNetwork(nn.Module):
-    """
-    A meta architecture that only predicts object proposals.
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-        self.backbone = build_backbone(cfg)
-        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
-
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            Same as in :class:`GeneralizedRCNN.forward`
-
-        Returns:
-            list[dict]:
-                Each dict is the output for one input image.
-                The dict contains one key "proposals" whose value is a
-                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-        features = self.backbone(images.tensor)
-
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        elif "targets" in batched_inputs[0]:
-            log_first_n(
-                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
-            )
-            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
-        # In training, the proposals are not useful at all but we generate them anyway.
-        # This makes RPN-only models about 5% slower.
-        if self.training:
-            return proposal_losses
-
-        processed_results = []
-        for results_per_image, input_per_image, image_size in zip(
-            proposals, batched_inputs, images.image_sizes
-        ):
-            height = input_per_image.get("height", image_size[0])
-            width = input_per_image.get("width", image_size[1])
-            r = detector_postprocess(results_per_image, height, width)
-            processed_results.append({"proposals": r})
-        return processed_results
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py
deleted file mode 100644
index 35c42cc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py
+++ /dev/null
@@ -1,489 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import math
-import numpy as np
-from typing import List
-import torch
-from fvcore.nn import sigmoid_focal_loss_jit, smooth_l1_loss
-from torch import nn
-
-from detectron2.layers import ShapeSpec, batched_nms, cat
-from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.logger import log_first_n
-
-from ..anchor_generator import build_anchor_generator
-from ..backbone import build_backbone
-from ..box_regression import Box2BoxTransform
-from ..matcher import Matcher
-from ..postprocessing import detector_postprocess
-from .build import META_ARCH_REGISTRY
-
-__all__ = ["RetinaNet"]
-
-
-def permute_to_N_HWA_K(tensor, K):
-    """
-    Transpose/reshape a tensor from (N, (A x K), H, W) to (N, (HxWxA), K)
-    """
-    assert tensor.dim() == 4, tensor.shape
-    N, _, H, W = tensor.shape
-    tensor = tensor.view(N, -1, K, H, W)
-    tensor = tensor.permute(0, 3, 4, 1, 2)
-    tensor = tensor.reshape(N, -1, K)  # Size=(N,HWA,K)
-    return tensor
-
-
-def permute_all_cls_and_box_to_N_HWA_K_and_concat(box_cls, box_delta, num_classes=80):
-    """
-    Rearrange the tensor layout from the network output, i.e.:
-    list[Tensor]: #lvl tensors of shape (N, A x K, Hi, Wi)
-    to per-image predictions, i.e.:
-    Tensor: of shape (N x sum(Hi x Wi x A), K)
-    """
-    # for each feature level, permute the outputs to make them be in the
-    # same format as the labels. Note that the labels are computed for
-    # all feature levels concatenated, so we keep the same representation
-    # for the objectness and the box_delta
-    box_cls_flattened = [permute_to_N_HWA_K(x, num_classes) for x in box_cls]
-    box_delta_flattened = [permute_to_N_HWA_K(x, 4) for x in box_delta]
-    # concatenate on the first dimension (representing the feature levels), to
-    # take into account the way the labels were generated (with all feature maps
-    # being concatenated as well)
-    box_cls = cat(box_cls_flattened, dim=1).view(-1, num_classes)
-    box_delta = cat(box_delta_flattened, dim=1).view(-1, 4)
-    return box_cls, box_delta
-
-
-@META_ARCH_REGISTRY.register()
-class RetinaNet(nn.Module):
-    """
-    Implement RetinaNet in :paper:`RetinaNet`.
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-
-        # fmt: off
-        self.num_classes              = cfg.MODEL.RETINANET.NUM_CLASSES
-        self.in_features              = cfg.MODEL.RETINANET.IN_FEATURES
-        # Loss parameters:
-        self.focal_loss_alpha         = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
-        self.focal_loss_gamma         = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
-        self.smooth_l1_loss_beta      = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
-        # Inference parameters:
-        self.score_threshold          = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
-        self.topk_candidates          = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
-        self.nms_threshold            = cfg.MODEL.RETINANET.NMS_THRESH_TEST
-        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
-        # Vis parameters
-        self.vis_period               = cfg.VIS_PERIOD
-        self.input_format             = cfg.INPUT.FORMAT
-        # fmt: on
-
-        self.backbone = build_backbone(cfg)
-
-        backbone_shape = self.backbone.output_shape()
-        feature_shapes = [backbone_shape[f] for f in self.in_features]
-        self.head = RetinaNetHead(cfg, feature_shapes)
-        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)
-
-        # Matching and loss
-        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
-        self.matcher = Matcher(
-            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
-            cfg.MODEL.RETINANET.IOU_LABELS,
-            allow_low_quality_matches=True,
-        )
-
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-        """
-        In Detectron1, loss is normalized by number of foreground samples in the batch.
-        When batch size is 1 per GPU, #foreground has a large variance and
-        using it lead to lower performance. Here we maintain an EMA of #foreground to
-        stabilize the normalizer.
-        """
-        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
-        self.loss_normalizer_momentum = 0.9
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def visualize_training(self, batched_inputs, results):
-        """
-        A function used to visualize ground truth images and final network predictions.
-        It shows ground truth bounding boxes on the original image and up to 20
-        predicted object bounding boxes on the original image.
-
-        Args:
-            batched_inputs (list): a list that contains input to the model.
-            results (List[Instances]): a list of #images elements.
-        """
-        from detectron2.utils.visualizer import Visualizer
-
-        assert len(batched_inputs) == len(
-            results
-        ), "Cannot visualize inputs and results of different sizes"
-        storage = get_event_storage()
-        max_boxes = 20
-
-        image_index = 0  # only visualize a single image
-        img = batched_inputs[image_index]["image"].cpu().numpy()
-        assert img.shape[0] == 3, "Images should have 3 channels."
-        if self.input_format == "BGR":
-            img = img[::-1, :, :]
-        img = img.transpose(1, 2, 0)
-        v_gt = Visualizer(img, None)
-        v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes)
-        anno_img = v_gt.get_image()
-        processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1])
-        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy()
-
-        v_pred = Visualizer(img, None)
-        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes])
-        prop_img = v_pred.get_image()
-        vis_img = np.vstack((anno_img, prop_img))
-        vis_img = vis_img.transpose(2, 0, 1)
-        vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
-        storage.put_image(vis_name, vis_img)
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
-                Each item in the list contains the inputs for one image.
-                For now, each item in the list is a dict that contains:
-
-                * image: Tensor, image in (C, H, W) format.
-                * instances: Instances
-
-                Other information that's included in the original dicts, such as:
-
-                * "height", "width" (int): the output resolution of the model, used in inference.
-                  See :meth:`postprocess` for details.
-        Returns:
-            dict[str: Tensor]:
-                mapping from a named loss to a tensor storing the loss. Used during training only.
-        """
-        images = self.preprocess_image(batched_inputs)
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        elif "targets" in batched_inputs[0]:
-            log_first_n(
-                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
-            )
-            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-
-        features = self.backbone(images.tensor)
-        features = [features[f] for f in self.in_features]
-        box_cls, box_delta = self.head(features)
-        anchors = self.anchor_generator(features)
-
-        if self.training:
-            gt_classes, gt_anchors_reg_deltas = self.get_ground_truth(anchors, gt_instances)
-            losses = self.losses(gt_classes, gt_anchors_reg_deltas, box_cls, box_delta)
-
-            if self.vis_period > 0:
-                storage = get_event_storage()
-                if storage.iter % self.vis_period == 0:
-                    results = self.inference(box_cls, box_delta, anchors, images.image_sizes)
-                    self.visualize_training(batched_inputs, results)
-
-            return losses
-        else:
-            results = self.inference(box_cls, box_delta, anchors, images.image_sizes)
-            processed_results = []
-            for results_per_image, input_per_image, image_size in zip(
-                results, batched_inputs, images.image_sizes
-            ):
-                height = input_per_image.get("height", image_size[0])
-                width = input_per_image.get("width", image_size[1])
-                r = detector_postprocess(results_per_image, height, width)
-                processed_results.append({"instances": r})
-            return processed_results
-
-    def losses(self, gt_classes, gt_anchors_deltas, pred_class_logits, pred_anchor_deltas):
-        """
-        Args:
-            For `gt_classes` and `gt_anchors_deltas` parameters, see
-                :meth:`RetinaNet.get_ground_truth`.
-            Their shapes are (N, R) and (N, R, 4), respectively, where R is
-            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
-            For `pred_class_logits` and `pred_anchor_deltas`, see
-                :meth:`RetinaNetHead.forward`.
-
-        Returns:
-            dict[str, Tensor]:
-                mapping from a named loss to a scalar tensor
-                storing the loss. Used during training only. The dict keys are:
-                "loss_cls" and "loss_box_reg"
-        """
-        pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat(
-            pred_class_logits, pred_anchor_deltas, self.num_classes
-        )  # Shapes: (N x R, K) and (N x R, 4), respectively.
-
-        gt_classes = gt_classes.flatten()
-        gt_anchors_deltas = gt_anchors_deltas.view(-1, 4)
-
-        valid_idxs = gt_classes >= 0
-        foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
-        num_foreground = foreground_idxs.sum().item()
-        get_event_storage().put_scalar("num_foreground", num_foreground)
-        self.loss_normalizer = (
-            self.loss_normalizer_momentum * self.loss_normalizer
-            + (1 - self.loss_normalizer_momentum) * num_foreground
-        )
-
-        gt_classes_target = torch.zeros_like(pred_class_logits)
-        gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
-
-        # logits loss
-        loss_cls = sigmoid_focal_loss_jit(
-            pred_class_logits[valid_idxs],
-            gt_classes_target[valid_idxs],
-            alpha=self.focal_loss_alpha,
-            gamma=self.focal_loss_gamma,
-            reduction="sum",
-        ) / max(1, self.loss_normalizer)
-
-        # regression loss
-        loss_box_reg = smooth_l1_loss(
-            pred_anchor_deltas[foreground_idxs],
-            gt_anchors_deltas[foreground_idxs],
-            beta=self.smooth_l1_loss_beta,
-            reduction="sum",
-        ) / max(1, self.loss_normalizer)
-
-        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
-
-    @torch.no_grad()
-    def get_ground_truth(self, anchors, targets):
-        """
-        Args:
-            anchors (list[Boxes]): A list of #feature level Boxes.
-                The Boxes contains anchors of this image on the specific feature level.
-            targets (list[Instances]): a list of N `Instances`s. The i-th
-                `Instances` contains the ground-truth per-instance annotations
-                for the i-th input image.  Specify `targets` during training only.
-
-        Returns:
-            gt_classes (Tensor):
-                An integer tensor of shape (N, R) storing ground-truth labels for each anchor.
-                R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels.
-                Anchors with an IoU with some target higher than the foreground threshold
-                are assigned their corresponding label in the [0, K-1] range.
-                Anchors whose IoU are below the background threshold are assigned
-                the label "K". Anchors whose IoU are between the foreground and background
-                thresholds are assigned a label "-1", i.e. ignore.
-            gt_anchors_deltas (Tensor):
-                Shape (N, R, 4).
-                The last dimension represents ground-truth box2box transform
-                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
-                The values in the tensor are meaningful only when the corresponding
-                anchor is labeled as foreground.
-        """
-        gt_classes = []
-        gt_anchors_deltas = []
-        anchors = Boxes.cat(anchors)  # Rx4
-
-        for targets_per_image in targets:
-            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, anchors)
-            gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix)
-
-            has_gt = len(targets_per_image) > 0
-            if has_gt:
-                # ground truth box regression
-                matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs]
-                gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
-                    anchors.tensor, matched_gt_boxes.tensor
-                )
-
-                gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
-                # Anchors with label 0 are treated as background.
-                gt_classes_i[anchor_labels == 0] = self.num_classes
-                # Anchors with label -1 are ignored.
-                gt_classes_i[anchor_labels == -1] = -1
-            else:
-                gt_classes_i = torch.zeros_like(gt_matched_idxs) + self.num_classes
-                gt_anchors_reg_deltas_i = torch.zeros_like(anchors.tensor)
-
-            gt_classes.append(gt_classes_i)
-            gt_anchors_deltas.append(gt_anchors_reg_deltas_i)
-
-        return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
-
-    def inference(self, box_cls, box_delta, anchors, image_sizes):
-        """
-        Arguments:
-            box_cls, box_delta: Same as the output of :meth:`RetinaNetHead.forward`
-            anchors (list[Boxes]): A list of #feature level Boxes.
-                The Boxes contain anchors of this image on the specific feature level.
-            image_sizes (List[torch.Size]): the input image sizes
-
-        Returns:
-            results (List[Instances]): a list of #images elements.
-        """
-        results = []
-
-        box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
-        box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
-        # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4)
-
-        for img_idx, image_size in enumerate(image_sizes):
-            box_cls_per_image = [box_cls_per_level[img_idx] for box_cls_per_level in box_cls]
-            box_reg_per_image = [box_reg_per_level[img_idx] for box_reg_per_level in box_delta]
-            results_per_image = self.inference_single_image(
-                box_cls_per_image, box_reg_per_image, anchors, tuple(image_size)
-            )
-            results.append(results_per_image)
-        return results
-
-    def inference_single_image(self, box_cls, box_delta, anchors, image_size):
-        """
-        Single-image inference. Return bounding-box detection results by thresholding
-        on scores and applying non-maximum suppression (NMS).
-
-        Arguments:
-            box_cls (list[Tensor]): list of #feature levels. Each entry contains
-                tensor of size (H x W x A, K)
-            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
-            anchors (list[Boxes]): list of #feature levels. Each entry contains
-                a Boxes object, which contains all the anchors for that
-                image in that feature level.
-            image_size (tuple(H, W)): a tuple of the image height and width.
-
-        Returns:
-            Same as `inference`, but for only one image.
-        """
-        boxes_all = []
-        scores_all = []
-        class_idxs_all = []
-
-        # Iterate over every feature level
-        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
-            # (HxWxAxK,)
-            box_cls_i = box_cls_i.flatten().sigmoid_()
-
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk_candidates, box_reg_i.size(0))
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
-            predicted_prob = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
-
-            # filter out the proposals with low confidence score
-            keep_idxs = predicted_prob > self.score_threshold
-            predicted_prob = predicted_prob[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
-
-            anchor_idxs = topk_idxs // self.num_classes
-            classes_idxs = topk_idxs % self.num_classes
-
-            box_reg_i = box_reg_i[anchor_idxs]
-            anchors_i = anchors_i[anchor_idxs]
-            # predict boxes
-            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)
-
-            boxes_all.append(predicted_boxes)
-            scores_all.append(predicted_prob)
-            class_idxs_all.append(classes_idxs)
-
-        boxes_all, scores_all, class_idxs_all = [
-            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
-        ]
-        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold)
-        keep = keep[: self.max_detections_per_image]
-
-        result = Instances(image_size)
-        result.pred_boxes = Boxes(boxes_all[keep])
-        result.scores = scores_all[keep]
-        result.pred_classes = class_idxs_all[keep]
-        return result
-
-    def preprocess_image(self, batched_inputs):
-        """
-        Normalize, pad and batch the input images.
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-        return images
-
-
-class RetinaNetHead(nn.Module):
-    """
-    The head used in RetinaNet for object classification and box regression.
-    It has two subnets for the two tasks, with a common structure but separate parameters.
-    """
-
-    def __init__(self, cfg, input_shape: List[ShapeSpec]):
-        super().__init__()
-        # fmt: off
-        in_channels      = input_shape[0].channels
-        num_classes      = cfg.MODEL.RETINANET.NUM_CLASSES
-        num_convs        = cfg.MODEL.RETINANET.NUM_CONVS
-        prior_prob       = cfg.MODEL.RETINANET.PRIOR_PROB
-        num_anchors      = build_anchor_generator(cfg, input_shape).num_cell_anchors
-        # fmt: on
-        assert (
-            len(set(num_anchors)) == 1
-        ), "Using different number of anchors between levels is not currently supported!"
-        num_anchors = num_anchors[0]
-
-        cls_subnet = []
-        bbox_subnet = []
-        for _ in range(num_convs):
-            cls_subnet.append(
-                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
-            )
-            cls_subnet.append(nn.ReLU())
-            bbox_subnet.append(
-                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
-            )
-            bbox_subnet.append(nn.ReLU())
-
-        self.cls_subnet = nn.Sequential(*cls_subnet)
-        self.bbox_subnet = nn.Sequential(*bbox_subnet)
-        self.cls_score = nn.Conv2d(
-            in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
-        )
-        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)
-
-        # Initialization
-        for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]:
-            for layer in modules.modules():
-                if isinstance(layer, nn.Conv2d):
-                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
-                    torch.nn.init.constant_(layer.bias, 0)
-
-        # Use prior in model initialization to improve stability
-        bias_value = -(math.log((1 - prior_prob) / prior_prob))
-        torch.nn.init.constant_(self.cls_score.bias, bias_value)
-
-    def forward(self, features):
-        """
-        Arguments:
-            features (list[Tensor]): FPN feature map tensors in high to low resolution.
-                Each tensor in the list correspond to different feature levels.
-
-        Returns:
-            logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi).
-                The tensor predicts the classification probability
-                at each spatial position for each of the A anchors and K object
-                classes.
-            bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi).
-                The tensor predicts 4-vector (dx,dy,dw,dh) box
-                regression values for every anchor. These values are the
-                relative offset between the anchor and the ground truth box.
-        """
-        logits = []
-        bbox_reg = []
-        for feature in features:
-            logits.append(self.cls_score(self.cls_subnet(feature)))
-            bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature)))
-        return logits, bbox_reg
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py
deleted file mode 100644
index 2c41a72..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-from typing import Dict
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.layers import Conv2d, ShapeSpec
-from detectron2.structures import ImageList
-from detectron2.utils.registry import Registry
-
-from ..backbone import build_backbone
-from ..postprocessing import sem_seg_postprocess
-from .build import META_ARCH_REGISTRY
-
-__all__ = ["SemanticSegmentor", "SEM_SEG_HEADS_REGISTRY", "SemSegFPNHead", "build_sem_seg_head"]
-
-
-SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS")
-SEM_SEG_HEADS_REGISTRY.__doc__ = """
-Registry for semantic segmentation heads, which make semantic segmentation predictions
-from feature maps.
-"""
-
-
-@META_ARCH_REGISTRY.register()
-class SemanticSegmentor(nn.Module):
-    """
-    Main class for semantic segmentation architectures.
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-        self.backbone = build_backbone(cfg)
-        self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape())
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
-                Each item in the list contains the inputs for one image.
-
-                For now, each item in the list is a dict that contains:
-
-                   * "image": Tensor, image in (C, H, W) format.
-                   * "sem_seg": semantic segmentation ground truth
-                   * Other information that's included in the original dicts, such as:
-                     "height", "width" (int): the output resolution of the model, used in inference.
-                     See :meth:`postprocess` for details.
-
-        Returns:
-            list[dict]:
-              Each dict is the output for one input image.
-              The dict contains one key "sem_seg" whose value is a
-              Tensor that represents the
-              per-pixel segmentation prediced by the head.
-              The prediction has shape KxHxW that represents the logits of
-              each class for each pixel.
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-
-        features = self.backbone(images.tensor)
-
-        if "sem_seg" in batched_inputs[0]:
-            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
-            targets = ImageList.from_tensors(
-                targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
-            ).tensor
-        else:
-            targets = None
-        results, losses = self.sem_seg_head(features, targets)
-
-        if self.training:
-            return losses
-
-        processed_results = []
-        for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
-            height = input_per_image.get("height")
-            width = input_per_image.get("width")
-            r = sem_seg_postprocess(result, image_size, height, width)
-            processed_results.append({"sem_seg": r})
-        return processed_results
-
-
-def build_sem_seg_head(cfg, input_shape):
-    """
-    Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`.
-    """
-    name = cfg.MODEL.SEM_SEG_HEAD.NAME
-    return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape)
-
-
-@SEM_SEG_HEADS_REGISTRY.register()
-class SemSegFPNHead(nn.Module):
-    """
-    A semantic segmentation head described in :paper:`PanopticFPN`.
-    It takes FPN features as input and merges information from all
-    levels of the FPN into single output.
-    """
-
-    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
-        super().__init__()
-
-        # fmt: off
-        self.in_features      = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
-        feature_strides       = {k: v.stride for k, v in input_shape.items()}
-        feature_channels      = {k: v.channels for k, v in input_shape.items()}
-        self.ignore_value     = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
-        num_classes           = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
-        conv_dims             = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
-        self.common_stride    = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE
-        norm                  = cfg.MODEL.SEM_SEG_HEAD.NORM
-        self.loss_weight      = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
-        # fmt: on
-
-        self.scale_heads = []
-        for in_feature in self.in_features:
-            head_ops = []
-            head_length = max(
-                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
-            )
-            for k in range(head_length):
-                norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None
-                conv = Conv2d(
-                    feature_channels[in_feature] if k == 0 else conv_dims,
-                    conv_dims,
-                    kernel_size=3,
-                    stride=1,
-                    padding=1,
-                    bias=not norm,
-                    norm=norm_module,
-                    activation=F.relu,
-                )
-                weight_init.c2_msra_fill(conv)
-                head_ops.append(conv)
-                if feature_strides[in_feature] != self.common_stride:
-                    head_ops.append(
-                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
-                    )
-            self.scale_heads.append(nn.Sequential(*head_ops))
-            self.add_module(in_feature, self.scale_heads[-1])
-        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
-        weight_init.c2_msra_fill(self.predictor)
-
-    def forward(self, features, targets=None):
-        """
-        Returns:
-            In training, returns (None, dict of losses)
-            In inference, returns (CxHxW logits, {})
-        """
-        x = self.layers(features)
-        if self.training:
-            return None, self.losses(x, targets)
-        else:
-            x = F.interpolate(
-                x, scale_factor=self.common_stride, mode="bilinear", align_corners=False
-            )
-            return x, {}
-
-    def layers(self, features):
-        for i, f in enumerate(self.in_features):
-            if i == 0:
-                x = self.scale_heads[i](features[f])
-            else:
-                x = x + self.scale_heads[i](features[f])
-        x = self.predictor(x)
-        return x
-
-    def losses(self, predictions, targets):
-        predictions = F.interpolate(
-            predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False
-        )
-        loss = F.cross_entropy(
-            predictions, targets, reduction="mean", ignore_index=self.ignore_value
-        )
-        losses = {"loss_sem_seg": loss * self.loss_weight}
-        return losses
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py
deleted file mode 100644
index 678f5af..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-import math
-import sys
-import torch
-from torch import nn
-from torchvision.ops import RoIPool
-
-from detectron2.layers import ROIAlign, ROIAlignRotated, cat
-
-__all__ = ["ROIPooler"]
-
-
-def assign_boxes_to_levels(box_lists, min_level, max_level, canonical_box_size, canonical_level):
-    """
-    Map each box in `box_lists` to a feature map level index and return the assignment
-    vector.
-
-    Args:
-        box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes,
-            where N is the number of images in the batch.
-        min_level (int): Smallest feature map level index. The input is considered index 0,
-            the output of stage 1 is index 1, and so.
-        max_level (int): Largest feature map level index.
-        canonical_box_size (int): A canonical box size in pixels (sqrt(box area)).
-        canonical_level (int): The feature map level index on which a canonically-sized box
-            should be placed.
-
-    Returns:
-        A tensor of length M, where M is the total number of boxes aggregated over all
-            N batch images. The memory layout corresponds to the concatenation of boxes
-            from all images. Each element is the feature map index, as an offset from
-            `self.min_level`, for the corresponding box (so value i means the box is at
-            `self.min_level + i`).
-    """
-    eps = sys.float_info.epsilon
-    box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists]))
-    # Eqn.(1) in FPN paper
-    level_assignments = torch.floor(
-        canonical_level + torch.log2(box_sizes / canonical_box_size + eps)
-    )
-    # clamp level to (min, max), in case the box size is too large or too small
-    # for the available feature maps
-    level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
-    return level_assignments.to(torch.int64) - min_level
-
-
-def convert_boxes_to_pooler_format(box_lists):
-    """
-    Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops
-    (see description under Returns).
-
-    Args:
-        box_lists (list[Boxes] | list[RotatedBoxes]):
-            A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
-
-    Returns:
-        When input is list[Boxes]:
-            A tensor of shape (M, 5), where M is the total number of boxes aggregated over all
-            N batch images.
-            The 5 columns are (batch index, x0, y0, x1, y1), where batch index
-            is the index in [0, N) identifying which batch image the box with corners at
-            (x0, y0, x1, y1) comes from.
-        When input is list[RotatedBoxes]:
-            A tensor of shape (M, 6), where M is the total number of boxes aggregated over all
-            N batch images.
-            The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees),
-            where batch index is the index in [0, N) identifying which batch image the
-            rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from.
-    """
-
-    def fmt_box_list(box_tensor, batch_index):
-        repeated_index = torch.full(
-            (len(box_tensor), 1), batch_index, dtype=box_tensor.dtype, device=box_tensor.device
-        )
-        return cat((repeated_index, box_tensor), dim=1)
-
-    pooler_fmt_boxes = cat(
-        [fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0
-    )
-
-    return pooler_fmt_boxes
-
-
-class ROIPooler(nn.Module):
-    """
-    Region of interest feature map pooler that supports pooling from one or more
-    feature maps.
-    """
-
-    def __init__(
-        self,
-        output_size,
-        scales,
-        sampling_ratio,
-        pooler_type,
-        canonical_box_size=224,
-        canonical_level=4,
-    ):
-        """
-        Args:
-            output_size (int, tuple[int] or list[int]): output size of the pooled region,
-                e.g., 14 x 14. If tuple or list is given, the length must be 2.
-            scales (list[float]): The scale for each low-level pooling op relative to
-                the input image. For a feature map with stride s relative to the input
-                image, scale is defined as a 1 / s. The stride must be power of 2.
-                When there are multiple scales, they must form a pyramid, i.e. they must be
-                a monotically decreasing geometric sequence with a factor of 1/2.
-            sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op.
-            pooler_type (string): Name of the type of pooling operation that should be applied.
-                For instance, "ROIPool" or "ROIAlignV2".
-            canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default
-                is heuristically defined as 224 pixels in the FPN paper (based on ImageNet
-                pre-training).
-            canonical_level (int): The feature map level index from which a canonically-sized box
-                should be placed. The default is defined as level 4 (stride=16) in the FPN paper,
-                i.e., a box of size 224x224 will be placed on the feature with stride=16.
-                The box placement for all boxes will be determined from their sizes w.r.t
-                canonical_box_size. For example, a box whose area is 4x that of a canonical box
-                should be used to pool features from feature level ``canonical_level+1``.
-
-                Note that the actual input feature maps given to this module may not have
-                sufficiently many levels for the input boxes. If the boxes are too large or too
-                small for the input feature maps, the closest level will be used.
-        """
-        super().__init__()
-
-        if isinstance(output_size, int):
-            output_size = (output_size, output_size)
-        assert len(output_size) == 2
-        assert isinstance(output_size[0], int) and isinstance(output_size[1], int)
-        self.output_size = output_size
-
-        if pooler_type == "ROIAlign":
-            self.level_poolers = nn.ModuleList(
-                ROIAlign(
-                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False
-                )
-                for scale in scales
-            )
-        elif pooler_type == "ROIAlignV2":
-            self.level_poolers = nn.ModuleList(
-                ROIAlign(
-                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True
-                )
-                for scale in scales
-            )
-        elif pooler_type == "ROIPool":
-            self.level_poolers = nn.ModuleList(
-                RoIPool(output_size, spatial_scale=scale) for scale in scales
-            )
-        elif pooler_type == "ROIAlignRotated":
-            self.level_poolers = nn.ModuleList(
-                ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio)
-                for scale in scales
-            )
-        else:
-            raise ValueError("Unknown pooler type: {}".format(pooler_type))
-
-        # Map scale (defined as 1 / stride) to its feature map level under the
-        # assumption that stride is a power of 2.
-        min_level = -(math.log2(scales[0]))
-        max_level = -(math.log2(scales[-1]))
-        assert math.isclose(min_level, int(min_level)) and math.isclose(
-            max_level, int(max_level)
-        ), "Featuremap stride is not power of 2!"
-        self.min_level = int(min_level)
-        self.max_level = int(max_level)
-        assert (
-            len(scales) == self.max_level - self.min_level + 1
-        ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!"
-        assert 0 < self.min_level and self.min_level <= self.max_level
-        self.canonical_level = canonical_level
-        assert canonical_box_size > 0
-        self.canonical_box_size = canonical_box_size
-
-    def forward(self, x, box_lists):
-        """
-        Args:
-            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
-                used to construct this module.
-            box_lists (list[Boxes] | list[RotatedBoxes]):
-                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
-                The box coordinates are defined on the original image and
-                will be scaled by the `scales` argument of :class:`ROIPooler`.
-
-        Returns:
-            Tensor:
-                A tensor of shape (M, C, output_size, output_size) where M is the total number of
-                boxes aggregated over all N batch images and C is the number of channels in `x`.
-        """
-        num_level_assignments = len(self.level_poolers)
-
-        assert isinstance(x, list) and isinstance(
-            box_lists, list
-        ), "Arguments to pooler must be lists"
-        assert (
-            len(x) == num_level_assignments
-        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
-            num_level_assignments, len(x)
-        )
-
-        assert len(box_lists) == x[0].size(
-            0
-        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
-            x[0].size(0), len(box_lists)
-        )
-
-        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
-
-        if num_level_assignments == 1:
-            return self.level_poolers[0](x[0], pooler_fmt_boxes)
-
-        level_assignments = assign_boxes_to_levels(
-            box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
-        )
-
-        num_boxes = len(pooler_fmt_boxes)
-        num_channels = x[0].shape[1]
-        output_size = self.output_size[0]
-
-        dtype, device = x[0].dtype, x[0].device
-        output = torch.zeros(
-            (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
-        )
-
-        for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)):
-            inds = torch.nonzero(level_assignments == level, as_tuple=True)[0]
-            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
-            output[inds] = pooler(x_level, pooler_fmt_boxes_level)
-
-        return output
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py
deleted file mode 100644
index e85541f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from torch.nn import functional as F
-
-from detectron2.layers import paste_masks_in_image
-from detectron2.structures import Instances
-from detectron2.utils.memory import retry_if_cuda_oom
-
-
-def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
-    """
-    Resize the output instances.
-    The input images are often resized when entering an object detector.
-    As a result, we often need the outputs of the detector in a different
-    resolution from its inputs.
-
-    This function will resize the raw outputs of an R-CNN detector
-    to produce outputs according to the desired output resolution.
-
-    Args:
-        results (Instances): the raw outputs from the detector.
-            `results.image_size` contains the input image resolution the detector sees.
-            This object might be modified in-place.
-        output_height, output_width: the desired output resolution.
-
-    Returns:
-        Instances: the resized output from the model, based on the output resolution
-    """
-    scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
-    results = Instances((output_height, output_width), **results.get_fields())
-
-    if results.has("pred_boxes"):
-        output_boxes = results.pred_boxes
-    elif results.has("proposal_boxes"):
-        output_boxes = results.proposal_boxes
-
-    output_boxes.scale(scale_x, scale_y)
-    output_boxes.clip(results.image_size)
-
-    results = results[output_boxes.nonempty()]
-
-    if results.has("pred_masks"):
-        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
-            results.pred_masks[:, 0, :, :],  # N, 1, M, M
-            results.pred_boxes,
-            results.image_size,
-            threshold=mask_threshold,
-        )
-
-    if results.has("pred_keypoints"):
-        results.pred_keypoints[:, :, 0] *= scale_x
-        results.pred_keypoints[:, :, 1] *= scale_y
-
-    return results
-
-
-def sem_seg_postprocess(result, img_size, output_height, output_width):
-    """
-    Return semantic segmentation predictions in the original resolution.
-
-    The input images are often resized when entering semantic segmentor. Moreover, in same
-    cases, they also padded inside segmentor to be divisible by maximum network stride.
-    As a result, we often need the predictions of the segmentor in a different
-    resolution from its inputs.
-
-    Args:
-        result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
-            where C is the number of classes, and H, W are the height and width of the prediction.
-        img_size (tuple): image size that segmentor is taking as input.
-        output_height, output_width: the desired output resolution.
-
-    Returns:
-        semantic segmentation prediction (Tensor): A tensor of the shape
-            (C, output_height, output_width) that contains per-pixel soft predictions.
-    """
-    result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
-    result = F.interpolate(
-        result, size=(output_height, output_width), mode="bilinear", align_corners=False
-    )[0]
-    return result
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py
deleted file mode 100644
index 64fb6d4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
-from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py
deleted file mode 100644
index 7f252bc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from detectron2.utils.registry import Registry
-
-PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
-PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
-Registry for proposal generator, which produces object proposals from feature maps.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-The call should return a `nn.Module` object.
-"""
-
-from . import rpn, rrpn  # noqa F401 isort:skip
-
-
-def build_proposal_generator(cfg, input_shape):
-    """
-    Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
-    The name can be "PrecomputedProposals" to use no proposal generator.
-    """
-    name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
-    if name == "PrecomputedProposals":
-        return None
-
-    return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py
deleted file mode 100644
index d4af905..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-import torch
-
-from detectron2.structures import Instances
-
-
-def add_ground_truth_to_proposals(gt_boxes, proposals):
-    """
-    Call `add_ground_truth_to_proposals_single_image` for all images.
-
-    Args:
-        gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
-            representing the gound-truth for image i.
-        proposals (list[Instances]): list of N elements. Element i is a Instances
-            representing the proposals for image i.
-
-    Returns:
-        list[Instances]: list of N Instances. Each is the proposals for the image,
-            with field "proposal_boxes" and "objectness_logits".
-    """
-    assert gt_boxes is not None
-
-    assert len(proposals) == len(gt_boxes)
-    if len(proposals) == 0:
-        return proposals
-
-    return [
-        add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
-        for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
-    ]
-
-
-def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
-    """
-    Augment `proposals` with ground-truth boxes from `gt_boxes`.
-
-    Args:
-        Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
-        per image.
-
-    Returns:
-        Same as `add_ground_truth_to_proposals`, but for only one image.
-    """
-    device = proposals.objectness_logits.device
-    # Concatenating gt_boxes with proposals requires them to have the same fields
-    # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1.
-    gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
-
-    gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
-    gt_proposal = Instances(proposals.image_size)
-
-    gt_proposal.proposal_boxes = gt_boxes
-    gt_proposal.objectness_logits = gt_logits
-    new_proposals = Instances.cat([proposals, gt_proposal])
-
-    return new_proposals
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py
deleted file mode 100644
index 8eb93b8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py
+++ /dev/null
@@ -1,285 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import Dict, List
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-from detectron2.config import configurable
-from detectron2.layers import ShapeSpec
-from detectron2.structures import Boxes, Instances, pairwise_iou
-from detectron2.utils.memory import retry_if_cuda_oom
-from detectron2.utils.registry import Registry
-
-from ..anchor_generator import build_anchor_generator
-from ..box_regression import Box2BoxTransform
-from ..matcher import Matcher
-from ..sampling import subsample_labels
-from .build import PROPOSAL_GENERATOR_REGISTRY
-from .rpn_outputs import RPNOutputs, find_top_rpn_proposals
-
-RPN_HEAD_REGISTRY = Registry("RPN_HEAD")
-RPN_HEAD_REGISTRY.__doc__ = """
-Registry for RPN heads, which take feature maps and perform
-objectness classification and bounding box regression for anchors.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-The call should return a `nn.Module` object.
-"""
-
-
-def build_rpn_head(cfg, input_shape):
-    """
-    Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`.
-    """
-    name = cfg.MODEL.RPN.HEAD_NAME
-    return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape)
-
-
-@RPN_HEAD_REGISTRY.register()
-class StandardRPNHead(nn.Module):
-    """
-    Standard RPN classification and regression heads described in :paper:`Faster R-CNN`.
-    Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts
-    objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas
-    specifying how to deform each anchor into an object proposal.
-    """
-
-    @configurable
-    def __init__(self, *, in_channels: int, num_anchors: int, box_dim: int = 4):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            in_channels (int): number of input feature channels. When using multiple
-                input features, they must have the same number of channels.
-            num_anchors (int): number of anchors to predict for *each spatial position*
-                on the feature map. The total number of anchors for each
-                feature map will be `num_anchors * H * W`.
-            box_dim (int): dimension of a box, which is also the number of box regression
-                predictions to make for each anchor. An axis aligned box has
-                box_dim=4, while a rotated box has box_dim=5.
-        """
-        super().__init__()
-        # 3x3 conv for the hidden representation
-        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
-        # 1x1 conv for predicting objectness logits
-        self.objectness_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
-        # 1x1 conv for predicting box2box transform deltas
-        self.anchor_deltas = nn.Conv2d(in_channels, num_anchors * box_dim, kernel_size=1, stride=1)
-
-        for l in [self.conv, self.objectness_logits, self.anchor_deltas]:
-            nn.init.normal_(l.weight, std=0.01)
-            nn.init.constant_(l.bias, 0)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        # Standard RPN is shared across levels:
-        in_channels = [s.channels for s in input_shape]
-        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
-        in_channels = in_channels[0]
-
-        # RPNHead should take the same input as anchor generator
-        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
-        anchor_generator = build_anchor_generator(cfg, input_shape)
-        num_anchors = anchor_generator.num_anchors
-        box_dim = anchor_generator.box_dim
-        assert (
-            len(set(num_anchors)) == 1
-        ), "Each level must have the same number of anchors per spatial position"
-        return {"in_channels": in_channels, "num_anchors": num_anchors[0], "box_dim": box_dim}
-
-    def forward(self, features):
-        """
-        Args:
-            features (list[Tensor]): list of feature maps
-
-        Returns:
-            list[Tensor]: A list of L elements.
-                Element i is a tensor of shape (N, A, Hi, Wi) representing
-                the predicted objectness logits for all anchors. A is the number of cell anchors.
-            list[Tensor]: A list of L elements. Element i is a tensor of shape
-                (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors
-                to proposals.
-        """
-        pred_objectness_logits = []
-        pred_anchor_deltas = []
-        for x in features:
-            t = F.relu(self.conv(x))
-            pred_objectness_logits.append(self.objectness_logits(t))
-            pred_anchor_deltas.append(self.anchor_deltas(t))
-        return pred_objectness_logits, pred_anchor_deltas
-
-
-@PROPOSAL_GENERATOR_REGISTRY.register()
-class RPN(nn.Module):
-    """
-    Region Proposal Network, introduced by :paper:`Faster R-CNN`.
-    """
-
-    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
-        super().__init__()
-
-        # fmt: off
-        self.min_box_side_len     = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
-        self.in_features          = cfg.MODEL.RPN.IN_FEATURES
-        self.nms_thresh           = cfg.MODEL.RPN.NMS_THRESH
-        self.batch_size_per_image = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE
-        self.positive_fraction    = cfg.MODEL.RPN.POSITIVE_FRACTION
-        self.smooth_l1_beta       = cfg.MODEL.RPN.SMOOTH_L1_BETA
-        self.loss_weight          = cfg.MODEL.RPN.LOSS_WEIGHT
-        # fmt: on
-
-        # Map from self.training state to train/test settings
-        self.pre_nms_topk = {
-            True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
-            False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST,
-        }
-        self.post_nms_topk = {
-            True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
-            False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST,
-        }
-        self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH
-
-        self.anchor_generator = build_anchor_generator(
-            cfg, [input_shape[f] for f in self.in_features]
-        )
-        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
-        self.anchor_matcher = Matcher(
-            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
-        )
-        self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features])
-
-    def _subsample_labels(self, label):
-        """
-        Randomly sample a subset of positive and negative examples, and overwrite
-        the label vector to the ignore value (-1) for all elements that are not
-        included in the sample.
-
-        Args:
-            labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned.
-        """
-        pos_idx, neg_idx = subsample_labels(
-            label, self.batch_size_per_image, self.positive_fraction, 0
-        )
-        # Fill with the ignore label (-1), then set positive and negative labels
-        label.fill_(-1)
-        label.scatter_(0, pos_idx, 1)
-        label.scatter_(0, neg_idx, 0)
-        return label
-
-    @torch.no_grad()
-    def label_and_sample_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]):
-        """
-        Args:
-            anchors (list[Boxes]): anchors for each feature map.
-            gt_instances: the ground-truth instances for each image.
-
-        Returns:
-            list[Tensor]:
-                List of #demo tensors. i-th element is a vector of labels whose length is
-                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
-                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
-            list[Tensor]:
-                i-th element is a Nx4 tensor, where N is the total number of anchors across
-                feature maps.  The values are the matched gt boxes for each anchor.
-                Values are undefined for those anchors not labeled as 1.
-        """
-        anchors = Boxes.cat(anchors)
-
-        gt_boxes = [x.gt_boxes for x in gt_instances]
-        image_sizes = [x.image_size for x in gt_instances]
-        del gt_instances
-
-        gt_labels = []
-        matched_gt_boxes = []
-        for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
-            """
-            image_size_i: (h, w) for the i-th image
-            gt_boxes_i: ground-truth boxes for i-th image
-            """
-
-            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors)
-            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
-            # Matching is memory-expensive and may result in CPU tensors. But the result is small
-            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
-            del match_quality_matrix
-
-            if self.boundary_threshold >= 0:
-                # Discard anchors that go out of the boundaries of the image
-                # NOTE: This is legacy functionality that is turned off by default in Detectron2
-                anchors_inside_image = anchors.inside_box(image_size_i, self.boundary_threshold)
-                gt_labels_i[~anchors_inside_image] = -1
-
-            # A vector of labels (-1, 0, 1) for each anchor
-            gt_labels_i = self._subsample_labels(gt_labels_i)
-
-            if len(gt_boxes_i) == 0:
-                # These values won't be used anyway since the anchor is labeled as background
-                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
-            else:
-                # TODO wasted indexing computation for ignored boxes
-                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
-
-            gt_labels.append(gt_labels_i)  # N,AHW
-            matched_gt_boxes.append(matched_gt_boxes_i)
-        return gt_labels, matched_gt_boxes
-
-    def forward(self, images, features, gt_instances=None):
-        """
-        Args:
-            images (ImageList): input images of length `N`
-            features (dict[str: Tensor]): input data as a mapping from feature
-                map name to tensor. Axis 0 represents the number of images `N` in
-                the input data; axes 1-3 are channels, height, and width, which may
-                vary between feature maps (e.g., if a feature pyramid is used).
-            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
-                Each `Instances` stores ground-truth instances for the corresponding image.
-
-        Returns:
-            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
-            loss: dict[Tensor] or None
-        """
-        features = [features[f] for f in self.in_features]
-        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
-        anchors = self.anchor_generator(features)
-
-        if self.training:
-            gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
-        else:
-            gt_labels, gt_boxes = None, None
-
-        outputs = RPNOutputs(
-            self.box2box_transform,
-            self.batch_size_per_image,
-            images,
-            pred_objectness_logits,
-            pred_anchor_deltas,
-            anchors,
-            gt_labels,
-            gt_boxes,
-            self.smooth_l1_beta,
-        )
-
-        if self.training:
-            losses = {k: v * self.loss_weight for k, v in outputs.losses().items()}
-        else:
-            losses = {}
-
-        with torch.no_grad():
-            # Find the top proposals by applying NMS and removing boxes that
-            # are too small. The proposals are treated as fixed for approximate
-            # joint training with roi heads. This approach ignores the derivative
-            # w.r.t. the proposal boxes’ coordinates that are also network
-            # responses, so is approximate.
-            proposals = find_top_rpn_proposals(
-                outputs.predict_proposals(),
-                outputs.predict_objectness_logits(),
-                images,
-                self.nms_thresh,
-                self.pre_nms_topk[self.training],
-                self.post_nms_topk[self.training],
-                self.min_box_side_len,
-                self.training,
-            )
-
-        return proposals, losses
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py
deleted file mode 100644
index 44f846f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-import logging
-import torch
-import torch.nn.functional as F
-from fvcore.nn import smooth_l1_loss
-
-from detectron2.layers import batched_nms, cat
-from detectron2.structures import Boxes, Instances
-from detectron2.utils.events import get_event_storage
-
-logger = logging.getLogger(__name__)
-
-# TODO: comments for future refactoring of this module
-#
-# From @rbg:
-# This code involves a significant amount of tensor reshaping and permuting. Look for
-# ways to simplify this.
-
-"""
-Shape shorthand in this module:
-
-    N: number of images in the minibatch
-    L: number of feature maps per image on which RPN is run
-    A: number of cell anchors (must be the same for all feature maps)
-    Hi, Wi: height and width of the i-th feature map
-    4: size of the box parameterization
-
-Naming convention:
-
-    objectness: refers to the binary classification of an anchor as object vs. not
-    object.
-
-    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
-    transform (see :class:`box_regression.Box2BoxTransform`).
-
-    pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use
-        sigmoid(pred_objectness_logits) to estimate P(object).
-
-    gt_labels: ground-truth binary classification labels for objectness
-
-    pred_anchor_deltas: predicted box2box transform deltas
-
-    gt_anchor_deltas: ground-truth box2box transform deltas
-"""
-
-
-def find_top_rpn_proposals(
-    proposals,
-    pred_objectness_logits,
-    images,
-    nms_thresh,
-    pre_nms_topk,
-    post_nms_topk,
-    min_box_side_len,
-    training,
-):
-    """
-    For each feature map, select the `pre_nms_topk` highest scoring proposals,
-    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
-    highest scoring proposals among all the feature maps if `training` is True,
-    otherwise, returns the highest `post_nms_topk` scoring proposals for each
-    feature map.
-
-    Args:
-        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
-            All proposal predictions on the feature maps.
-        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
-        images (ImageList): Input images as an :class:`ImageList`.
-        nms_thresh (float): IoU threshold to use for NMS
-        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
-            When RPN is run on multiple feature maps (as in FPN) this number is per
-            feature map.
-        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
-            When RPN is run on multiple feature maps (as in FPN) this number is total,
-            over all feature maps.
-        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
-            wrt input images).
-        training (bool): True if proposals are to be used in training, otherwise False.
-            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
-            comment.
-
-    Returns:
-        proposals (list[Instances]): list of N Instances. The i-th Instances
-            stores post_nms_topk object proposals for image i, sorted by their
-            objectness score in descending order.
-    """
-    image_sizes = images.image_sizes  # in (h, w) order
-    num_images = len(image_sizes)
-    device = proposals[0].device
-
-    # 1. Select top-k anchor for every level and every image
-    topk_scores = []  # #lvl Tensor, each of shape N x topk
-    topk_proposals = []
-    level_ids = []  # #lvl Tensor, each of shape (topk,)
-    batch_idx = torch.arange(num_images, device=device)
-    for level_id, proposals_i, logits_i in zip(
-        itertools.count(), proposals, pred_objectness_logits
-    ):
-        Hi_Wi_A = logits_i.shape[1]
-        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)
-
-        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
-        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
-        logits_i, idx = logits_i.sort(descending=True, dim=1)
-        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
-        topk_idx = idx[batch_idx, :num_proposals_i]
-
-        # each is N x topk
-        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 4
-
-        topk_proposals.append(topk_proposals_i)
-        topk_scores.append(topk_scores_i)
-        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
-
-    # 2. Concat all levels together
-    topk_scores = cat(topk_scores, dim=1)
-    topk_proposals = cat(topk_proposals, dim=1)
-    level_ids = cat(level_ids, dim=0)
-
-    # 3. For each image, run a per-level NMS, and choose topk results.
-    results = []
-    for n, image_size in enumerate(image_sizes):
-        boxes = Boxes(topk_proposals[n])
-        scores_per_img = topk_scores[n]
-        lvl = level_ids
-
-        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
-        if not valid_mask.all():
-            if training:
-                raise FloatingPointError(
-                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
-                )
-            boxes = boxes[valid_mask]
-            scores_per_img = scores_per_img[valid_mask]
-            lvl = lvl[valid_mask]
-        boxes.clip(image_size)
-
-        # filter empty boxes
-        keep = boxes.nonempty(threshold=min_box_side_len)
-        if keep.sum().item() != len(boxes):
-            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
-
-        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
-        # In Detectron1, there was different behavior during training vs. testing.
-        # (https://github.com/facebookresearch/Detectron/issues/459)
-        # During training, topk is over the proposals from *all* images in the training batch.
-        # During testing, it is over the proposals for each image separately.
-        # As a result, the training behavior becomes batch-dependent,
-        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
-        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
-        keep = keep[:post_nms_topk]  # keep is already sorted
-
-        res = Instances(image_size)
-        res.proposal_boxes = boxes[keep]
-        res.objectness_logits = scores_per_img[keep]
-        results.append(res)
-    return results
-
-
-def rpn_losses(
-    gt_labels, gt_anchor_deltas, pred_objectness_logits, pred_anchor_deltas, smooth_l1_beta
-):
-    """
-    Args:
-        gt_labels (Tensor): shape (N,), each element in {-1, 0, 1} representing
-            ground-truth objectness labels with: -1 = ignore; 0 = not object; 1 = object.
-        gt_anchor_deltas (Tensor): shape (N, box_dim), row i represents ground-truth
-            box2box transform targets (dx, dy, dw, dh) or (dx, dy, dw, dh, da) that map anchor i to
-            its matched ground-truth box.
-        pred_objectness_logits (Tensor): shape (N,), each element is a predicted objectness
-            logit.
-        pred_anchor_deltas (Tensor): shape (N, box_dim), each row is a predicted box2box
-            transform (dx, dy, dw, dh) or (dx, dy, dw, dh, da)
-        smooth_l1_beta (float): The transition point between L1 and L2 loss in
-            the smooth L1 loss function. When set to 0, the loss becomes L1. When
-            set to +inf, the loss becomes constant 0.
-
-    Returns:
-        objectness_loss, localization_loss, both unnormalized (summed over samples).
-    """
-    pos_masks = gt_labels == 1
-    localization_loss = smooth_l1_loss(
-        pred_anchor_deltas[pos_masks], gt_anchor_deltas[pos_masks], smooth_l1_beta, reduction="sum"
-    )
-
-    valid_masks = gt_labels >= 0
-    objectness_loss = F.binary_cross_entropy_with_logits(
-        pred_objectness_logits[valid_masks],
-        gt_labels[valid_masks].to(torch.float32),
-        reduction="sum",
-    )
-    return objectness_loss, localization_loss
-
-
-class RPNOutputs(object):
-    def __init__(
-        self,
-        box2box_transform,
-        batch_size_per_image,
-        images,
-        pred_objectness_logits,
-        pred_anchor_deltas,
-        anchors,
-        gt_labels=None,
-        gt_boxes=None,
-        smooth_l1_beta=0.0,
-    ):
-        """
-        Args:
-            box2box_transform (Box2BoxTransform): :class:`Box2BoxTransform` instance for
-                anchor-proposal transformations.
-            images (ImageList): :class:`ImageList` instance representing N input images
-            batch_size_per_image (int): number of proposals to sample when training
-            pred_objectness_logits (list[Tensor]): A list of L elements.
-                Element i is a tensor of shape (N, A, Hi, Wi) representing
-                the predicted objectness logits for anchors.
-            pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
-                (N, A*4 or 5, Hi, Wi) representing the predicted "deltas" used to transform anchors
-                to proposals.
-            anchors (list[Boxes or RotatedBoxes]): A list of Boxes/RotatedBoxes storing the all
-                the anchors for each feature map. See :meth:`AnchorGenerator.forward`.
-            gt_labels (list[Tensor]): Available on in training.
-                See :meth:`RPN.label_and_sample_anchors`.
-            gt_boxes (list[Boxes or RotatedBoxes]): Available on in training.
-                See :meth:`RPN.label_and_sample_anchors`.
-            smooth_l1_beta (float): The transition point between L1 and L2 loss in
-                the smooth L1 loss function. When set to 0, the loss becomes L1. When
-                set to +inf, the loss becomes constant 0.
-        """
-        self.box2box_transform = box2box_transform
-        self.batch_size_per_image = batch_size_per_image
-
-        B = anchors[0].tensor.size(1)  # box dimension (4 or 5)
-        self.pred_objectness_logits = [
-            # Reshape: (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A)
-            score.permute(0, 2, 3, 1).flatten(1)
-            for score in pred_objectness_logits
-        ]
-
-        self.pred_anchor_deltas = [
-            # Reshape: (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B)
-            #          -> (N, Hi*Wi*A, B)
-            x.view(x.shape[0], -1, B, x.shape[-2], x.shape[-1])
-            .permute(0, 3, 4, 1, 2)
-            .flatten(1, -2)
-            for x in pred_anchor_deltas
-        ]
-
-        self.anchors = anchors
-
-        self.gt_boxes = gt_boxes
-        self.gt_labels = gt_labels
-
-        self.num_images = len(images)
-        self.smooth_l1_beta = smooth_l1_beta
-
-    def losses(self):
-        """
-        Return the losses from a set of RPN predictions and their associated ground-truth.
-
-        Returns:
-            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
-                Loss names are: `loss_rpn_cls` for objectness classification and
-                `loss_rpn_loc` for proposal localization.
-        """
-        gt_labels = torch.stack(self.gt_labels)
-        anchors = self.anchors[0].cat(self.anchors).tensor  # Ax(4 or 5)
-        gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in self.gt_boxes]
-        gt_anchor_deltas = torch.stack(gt_anchor_deltas)
-
-        # Log the number of positive/negative anchors per-image that's used in training
-        num_pos_anchors = (gt_labels == 1).sum().item()
-        num_neg_anchors = (gt_labels == 0).sum().item()
-        storage = get_event_storage()
-        storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / self.num_images)
-        storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / self.num_images)
-
-        objectness_loss, localization_loss = rpn_losses(
-            gt_labels,
-            gt_anchor_deltas,
-            # concat on the Hi*Wi*A dimension
-            cat(self.pred_objectness_logits, dim=1),
-            cat(self.pred_anchor_deltas, dim=1),
-            self.smooth_l1_beta,
-        )
-        normalizer = self.batch_size_per_image * self.num_images
-        return {
-            "loss_rpn_cls": objectness_loss / normalizer,
-            "loss_rpn_loc": localization_loss / normalizer,
-        }
-
-    def predict_proposals(self):
-        """
-        Transform anchors into proposals by applying the predicted anchor deltas.
-
-        Returns:
-            proposals (list[Tensor]): A list of L tensors. Tensor i has shape
-                (N, Hi*Wi*A, B), where B is box dimension (4 or 5).
-        """
-        proposals = []
-        # For each feature map
-        for anchors_i, pred_anchor_deltas_i in zip(self.anchors, self.pred_anchor_deltas):
-            B = anchors_i.tensor.size(1)
-            N = self.num_images
-            pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B)
-            # Expand anchors to shape (N*Hi*Wi*A, B)
-            anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B)
-            proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i)
-            # Append feature map proposals with shape (N, Hi*Wi*A, B)
-            proposals.append(proposals_i.view(N, -1, B))
-        return proposals
-
-    def predict_objectness_logits(self):
-        """
-        Return objectness logits in the same format as the proposals returned by
-        :meth:`predict_proposals`.
-
-        Returns:
-            pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape
-                (N, Hi*Wi*A).
-        """
-        return self.pred_objectness_logits
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py
deleted file mode 100644
index 8c2ac36..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-import logging
-from typing import Dict, List
-import torch
-
-from detectron2.layers import ShapeSpec, batched_nms_rotated, cat
-from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
-from detectron2.utils.memory import retry_if_cuda_oom
-
-from ..box_regression import Box2BoxTransformRotated
-from .build import PROPOSAL_GENERATOR_REGISTRY
-from .rpn import RPN
-from .rpn_outputs import RPNOutputs
-
-logger = logging.getLogger(__name__)
-
-
-def find_top_rrpn_proposals(
-    proposals,
-    pred_objectness_logits,
-    images,
-    nms_thresh,
-    pre_nms_topk,
-    post_nms_topk,
-    min_box_side_len,
-    training,
-):
-    """
-    For each feature map, select the `pre_nms_topk` highest scoring proposals,
-    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
-    highest scoring proposals among all the feature maps if `training` is True,
-    otherwise, returns the highest `post_nms_topk` scoring proposals for each
-    feature map.
-
-    Args:
-        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5).
-            All proposal predictions on the feature maps.
-        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
-        images (ImageList): Input images as an :class:`ImageList`.
-        nms_thresh (float): IoU threshold to use for NMS
-        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
-            When RRPN is run on multiple feature maps (as in FPN) this number is per
-            feature map.
-        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
-            When RRPN is run on multiple feature maps (as in FPN) this number is total,
-            over all feature maps.
-        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
-            wrt input images).
-        training (bool): True if proposals are to be used in training, otherwise False.
-            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
-            comment.
-
-    Returns:
-        proposals (list[Instances]): list of N Instances. The i-th Instances
-            stores post_nms_topk object proposals for image i.
-    """
-    image_sizes = images.image_sizes  # in (h, w) order
-    num_images = len(image_sizes)
-    device = proposals[0].device
-
-    # 1. Select top-k anchor for every level and every image
-    topk_scores = []  # #lvl Tensor, each of shape N x topk
-    topk_proposals = []
-    level_ids = []  # #lvl Tensor, each of shape (topk,)
-    batch_idx = torch.arange(num_images, device=device)
-    for level_id, proposals_i, logits_i in zip(
-        itertools.count(), proposals, pred_objectness_logits
-    ):
-        Hi_Wi_A = logits_i.shape[1]
-        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)
-
-        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
-        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
-        logits_i, idx = logits_i.sort(descending=True, dim=1)
-        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
-        topk_idx = idx[batch_idx, :num_proposals_i]
-
-        # each is N x topk
-        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 5
-
-        topk_proposals.append(topk_proposals_i)
-        topk_scores.append(topk_scores_i)
-        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
-
-    # 2. Concat all levels together
-    topk_scores = cat(topk_scores, dim=1)
-    topk_proposals = cat(topk_proposals, dim=1)
-    level_ids = cat(level_ids, dim=0)
-
-    # 3. For each image, run a per-level NMS, and choose topk results.
-    results = []
-    for n, image_size in enumerate(image_sizes):
-        boxes = RotatedBoxes(topk_proposals[n])
-        scores_per_img = topk_scores[n]
-        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
-        if not valid_mask.all():
-            boxes = boxes[valid_mask]
-            scores_per_img = scores_per_img[valid_mask]
-        boxes.clip(image_size)
-
-        # filter empty boxes
-        keep = boxes.nonempty(threshold=min_box_side_len)
-        lvl = level_ids
-        if keep.sum().item() != len(boxes):
-            boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep])
-
-        keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh)
-        # In Detectron1, there was different behavior during training vs. testing.
-        # (https://github.com/facebookresearch/Detectron/issues/459)
-        # During training, topk is over the proposals from *all* images in the training batch.
-        # During testing, it is over the proposals for each image separately.
-        # As a result, the training behavior becomes batch-dependent,
-        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
-        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
-        keep = keep[:post_nms_topk]
-
-        res = Instances(image_size)
-        res.proposal_boxes = boxes[keep]
-        res.objectness_logits = scores_per_img[keep]
-        results.append(res)
-    return results
-
-
-@PROPOSAL_GENERATOR_REGISTRY.register()
-class RRPN(RPN):
-    """
-    Rotated Region Proposal Network described in :paper:`RRPN`.
-    """
-
-    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
-        super().__init__(cfg, input_shape)
-        self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
-        if self.boundary_threshold >= 0:
-            raise NotImplementedError(
-                "boundary_threshold is a legacy option not implemented for RRPN."
-            )
-
-    @torch.no_grad()
-    def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]):
-        """
-        Args:
-            anchors (list[RotatedBoxes]): anchors for each feature map.
-            gt_instances: the ground-truth instances for each image.
-
-        Returns:
-            list[Tensor]:
-                List of #demo tensors. i-th element is a vector of labels whose length is
-                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
-                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
-            list[Tensor]:
-                i-th element is a Nx5 tensor, where N is the total number of anchors across
-                feature maps.  The values are the matched gt boxes for each anchor.
-                Values are undefined for those anchors not labeled as 1.
-        """
-        anchors = RotatedBoxes.cat(anchors)
-
-        gt_boxes = [x.gt_boxes for x in gt_instances]
-        del gt_instances
-
-        gt_labels = []
-        matched_gt_boxes = []
-        for gt_boxes_i in gt_boxes:
-            """
-            gt_boxes_i: ground-truth boxes for i-th image
-            """
-            match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors)
-            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
-            # Matching is memory-expensive and may result in CPU tensors. But the result is small
-            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
-
-            # A vector of labels (-1, 0, 1) for each anchor
-            gt_labels_i = self._subsample_labels(gt_labels_i)
-
-            if len(gt_boxes_i) == 0:
-                # These values won't be used anyway since the anchor is labeled as background
-                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
-            else:
-                # TODO wasted indexing computation for ignored boxes
-                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
-
-            gt_labels.append(gt_labels_i)  # N,AHW
-            matched_gt_boxes.append(matched_gt_boxes_i)
-        return gt_labels, matched_gt_boxes
-
-    def forward(self, images, features, gt_instances=None):
-        # same signature as RPN.forward
-        features = [features[f] for f in self.in_features]
-        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
-        anchors = self.anchor_generator(features)
-
-        if self.training:
-            gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
-        else:
-            gt_labels, gt_boxes = None, None
-
-        outputs = RPNOutputs(
-            self.box2box_transform,
-            self.batch_size_per_image,
-            images,
-            pred_objectness_logits,
-            pred_anchor_deltas,
-            anchors,
-            gt_labels,
-            gt_boxes,
-            self.smooth_l1_beta,
-        )
-
-        if self.training:
-            losses = {k: v * self.loss_weight for k, v in outputs.losses().items()}
-        else:
-            losses = {}
-
-        with torch.no_grad():
-            # Find the top proposals by applying NMS and removing boxes that
-            # are too small. The proposals are treated as fixed for approximate
-            # joint training with roi heads. This approach ignores the derivative
-            # w.r.t. the proposal boxes’ coordinates that are also network
-            # responses, so is approximate.
-
-            # Note: this line is the only difference v.s. RPN.forward
-            proposals = find_top_rrpn_proposals(
-                outputs.predict_proposals(),
-                outputs.predict_objectness_logits(),
-                images,
-                self.nms_thresh,
-                self.pre_nms_topk[self.training],
-                self.post_nms_topk[self.training],
-                self.min_box_side_len,
-                self.training,
-            )
-
-        return proposals, losses
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py
deleted file mode 100644
index a49099a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head
-from .keypoint_head import ROI_KEYPOINT_HEAD_REGISTRY, build_keypoint_head, BaseKeypointRCNNHead
-from .mask_head import ROI_MASK_HEAD_REGISTRY, build_mask_head, BaseMaskRCNNHead
-from .roi_heads import (
-    ROI_HEADS_REGISTRY,
-    ROIHeads,
-    Res5ROIHeads,
-    StandardROIHeads,
-    build_roi_heads,
-    select_foreground_proposals,
-)
-from .rotated_fast_rcnn import RROIHeads
-from .fast_rcnn import FastRCNNOutputLayers
-
-from . import cascade_rcnn  # isort:skip
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py
deleted file mode 100644
index de62d47..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-from typing import List
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.config import configurable
-from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm
-from detectron2.utils.registry import Registry
-
-ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
-ROI_BOX_HEAD_REGISTRY.__doc__ = """
-Registry for box heads, which make box predictions from per-region features.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-"""
-
-
-@ROI_BOX_HEAD_REGISTRY.register()
-class FastRCNNConvFCHead(nn.Module):
-    """
-    A head with several 3x3 conv layers (each followed by norm & relu) and then
-    several fc layers (each followed by relu).
-    """
-
-    @configurable
-    def __init__(
-        self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
-    ):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            input_shape (ShapeSpec): shape of the input feature.
-            conv_dims (list[int]): the output dimensions of the conv layers
-            fc_dims (list[int]): the output dimensions of the fc layers
-            conv_norm (str or callable): normalization for the conv layers.
-                See :func:`detectron2.layers.get_norm` for supported types.
-        """
-        super().__init__()
-        assert len(conv_dims) + len(fc_dims) > 0
-
-        self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
-
-        self.conv_norm_relus = []
-        for k, conv_dim in enumerate(conv_dims):
-            conv = Conv2d(
-                self._output_size[0],
-                conv_dim,
-                kernel_size=3,
-                padding=1,
-                bias=not conv_norm,
-                norm=get_norm(conv_norm, conv_dim),
-                activation=F.relu,
-            )
-            self.add_module("conv{}".format(k + 1), conv)
-            self.conv_norm_relus.append(conv)
-            self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
-
-        self.fcs = []
-        for k, fc_dim in enumerate(fc_dims):
-            fc = Linear(np.prod(self._output_size), fc_dim)
-            self.add_module("fc{}".format(k + 1), fc)
-            self.fcs.append(fc)
-            self._output_size = fc_dim
-
-        for layer in self.conv_norm_relus:
-            weight_init.c2_msra_fill(layer)
-        for layer in self.fcs:
-            weight_init.c2_xavier_fill(layer)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
-        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
-        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
-        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
-        return {
-            "input_shape": input_shape,
-            "conv_dims": [conv_dim] * num_conv,
-            "fc_dims": [fc_dim] * num_fc,
-            "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
-        }
-
-    def forward(self, x):
-        for layer in self.conv_norm_relus:
-            x = layer(x)
-        if len(self.fcs):
-            if x.dim() > 2:
-                x = torch.flatten(x, start_dim=1)
-            for layer in self.fcs:
-                x = F.relu(layer(x))
-        return x
-
-    @property
-    def output_shape(self):
-        """
-        Returns:
-            ShapeSpec: the output feature shape
-        """
-        o = self._output_size
-        if isinstance(o, int):
-            return ShapeSpec(channels=o)
-        else:
-            return ShapeSpec(channels=o[0], height=o[1], width=o[2])
-
-
-def build_box_head(cfg, input_shape):
-    """
-    Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
-    """
-    name = cfg.MODEL.ROI_BOX_HEAD.NAME
-    return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py
deleted file mode 100644
index b3efdcf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import List
-import torch
-from torch import nn
-from torch.autograd.function import Function
-
-from detectron2.config import configurable
-from detectron2.layers import ShapeSpec
-from detectron2.structures import Boxes, Instances, pairwise_iou
-from detectron2.utils.events import get_event_storage
-
-from ..box_regression import Box2BoxTransform
-from ..matcher import Matcher
-from ..poolers import ROIPooler
-from .box_head import build_box_head
-from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference
-from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
-
-
-class _ScaleGradient(Function):
-    @staticmethod
-    def forward(ctx, input, scale):
-        ctx.scale = scale
-        return input
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        return grad_output * ctx.scale, None
-
-
-@ROI_HEADS_REGISTRY.register()
-class CascadeROIHeads(StandardROIHeads):
-    """
-    Implement :paper:`Cascade R-CNN`.
-    """
-
-    @configurable
-    def __init__(
-        self,
-        *,
-        box_in_features: List[str],
-        box_pooler: ROIPooler,
-        box_heads: List[nn.Module],
-        box_predictors: List[nn.Module],
-        proposal_matchers: List[Matcher],
-        **kwargs,
-    ):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            box_pooler (ROIPooler): pooler that extracts region features from given boxes
-            box_heads (list[nn.Module]): box head for each cascade stage
-            box_predictors (list[nn.Module]): box predictor for each cascade stage
-            proposal_matchers (list[Matcher]): matcher with different IoU thresholds to
-                match boxes with ground truth for each stage. The first matcher matches
-                RPN proposals with ground truth, the other matchers use boxes predicted
-                by the previous stage as proposals and match them with ground truth.
-        """
-        assert "proposal_matcher" not in kwargs, (
-            "CascadeROIHeads takes 'proposal_matchers=' for each stage instead "
-            "of one 'proposal_matcher='."
-        )
-        # The first matcher matches RPN proposals with ground truth, done in the base class
-        kwargs["proposal_matcher"] = proposal_matchers[0]
-        num_stages = self.num_cascade_stages = len(box_heads)
-        box_heads = nn.ModuleList(box_heads)
-        box_predictors = nn.ModuleList(box_predictors)
-        assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!"
-        assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!"
-        super().__init__(
-            box_in_features=box_in_features,
-            box_pooler=box_pooler,
-            box_head=box_heads,
-            box_predictor=box_predictors,
-            **kwargs,
-        )
-        self.proposal_matchers = proposal_matchers
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        ret = super().from_config(cfg, input_shape)
-        ret.pop("proposal_matcher")
-        return ret
-
-    @classmethod
-    def _init_box_head(cls, cfg, input_shape):
-        # fmt: off
-        in_features              = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution        = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
-        pooler_scales            = tuple(1.0 / input_shape[k].stride for k in in_features)
-        sampling_ratio           = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
-        pooler_type              = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
-        cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
-        cascade_ious             = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
-        assert len(cascade_bbox_reg_weights) == len(cascade_ious)
-        assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,  \
-            "CascadeROIHeads only support class-agnostic regression now!"
-        assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
-        # fmt: on
-
-        in_channels = [input_shape[f].channels for f in in_features]
-        # Check all channel counts are equal
-        assert len(set(in_channels)) == 1, in_channels
-        in_channels = in_channels[0]
-
-        box_pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-        pooled_shape = ShapeSpec(
-            channels=in_channels, width=pooler_resolution, height=pooler_resolution
-        )
-
-        box_heads, box_predictors, proposal_matchers = [], [], []
-        for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights):
-            box_head = build_box_head(cfg, pooled_shape)
-            box_heads.append(box_head)
-            box_predictors.append(
-                FastRCNNOutputLayers(
-                    cfg,
-                    box_head.output_shape,
-                    box2box_transform=Box2BoxTransform(weights=bbox_reg_weights),
-                )
-            )
-            proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
-        return {
-            "box_in_features": in_features,
-            "box_pooler": box_pooler,
-            "box_heads": box_heads,
-            "box_predictors": box_predictors,
-            "proposal_matchers": proposal_matchers,
-        }
-
-    def forward(self, images, features, proposals, targets=None):
-        del images
-        if self.training:
-            proposals = self.label_and_sample_proposals(proposals, targets)
-
-        if self.training:
-            # Need targets to box head
-            losses = self._forward_box(features, proposals, targets)
-            losses.update(self._forward_mask(features, proposals))
-            losses.update(self._forward_keypoint(features, proposals))
-            return proposals, losses
-        else:
-            pred_instances = self._forward_box(features, proposals)
-            pred_instances = self.forward_with_given_boxes(features, pred_instances)
-            return pred_instances, {}
-
-    def _forward_box(self, features, proposals, targets=None):
-        """
-        Args:
-            features, targets: the same as in
-                Same as in :meth:`ROIHeads.forward`.
-            proposals (list[Instances]): the per-image object proposals with
-                their matching ground truth.
-                Each has fields "proposal_boxes", and "objectness_logits",
-                "gt_classes", "gt_boxes".
-        """
-        features = [features[f] for f in self.box_in_features]
-        head_outputs = []  # (predictor, predictions, proposals)
-        prev_pred_boxes = None
-        image_sizes = [x.image_size for x in proposals]
-        for k in range(self.num_cascade_stages):
-            if k > 0:
-                # The output boxes of the previous stage are used to create the input
-                # proposals of the next stage.
-                proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes)
-                if self.training:
-                    proposals = self._match_and_label_boxes(proposals, k, targets)
-            predictions = self._run_stage(features, proposals, k)
-            prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals)
-            head_outputs.append((self.box_predictor[k], predictions, proposals))
-
-        if self.training:
-            losses = {}
-            storage = get_event_storage()
-            for stage, (predictor, predictions, proposals) in enumerate(head_outputs):
-                with storage.name_scope("stage{}".format(stage)):
-                    stage_losses = predictor.losses(predictions, proposals)
-                losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
-            return losses
-        else:
-            # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
-            scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs]
-
-            # Average the scores across heads
-            scores = [
-                sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages)
-                for scores_per_image in zip(*scores_per_stage)
-            ]
-            # Use the boxes of the last head
-            predictor, predictions, proposals = head_outputs[-1]
-            boxes = predictor.predict_boxes(predictions, proposals)
-            pred_instances, _ = fast_rcnn_inference(
-                boxes,
-                scores,
-                image_sizes,
-                predictor.test_score_thresh,
-                predictor.test_nms_thresh,
-                predictor.test_topk_per_image,
-            )
-            return pred_instances
-
-    @torch.no_grad()
-    def _match_and_label_boxes(self, proposals, stage, targets):
-        """
-        Match proposals with groundtruth using the matcher at the given stage.
-        Label the proposals as foreground or background based on the match.
-
-        Args:
-            proposals (list[Instances]): One Instances for each image, with
-                the field "proposal_boxes".
-            stage (int): the current stage
-            targets (list[Instances]): the ground truth instances
-
-        Returns:
-            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
-        """
-        num_fg_samples, num_bg_samples = [], []
-        for proposals_per_image, targets_per_image in zip(proposals, targets):
-            match_quality_matrix = pairwise_iou(
-                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
-            )
-            # proposal_labels are 0 or 1
-            matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
-            if len(targets_per_image) > 0:
-                gt_classes = targets_per_image.gt_classes[matched_idxs]
-                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
-                gt_classes[proposal_labels == 0] = self.num_classes
-                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
-            else:
-                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
-                gt_boxes = Boxes(
-                    targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
-                )
-            proposals_per_image.gt_classes = gt_classes
-            proposals_per_image.gt_boxes = gt_boxes
-
-            num_fg_samples.append((proposal_labels == 1).sum().item())
-            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])
-
-        # Log the number of fg/bg samples in each stage
-        storage = get_event_storage()
-        storage.put_scalar(
-            "stage{}/roi_head/num_fg_samples".format(stage),
-            sum(num_fg_samples) / len(num_fg_samples),
-        )
-        storage.put_scalar(
-            "stage{}/roi_head/num_bg_samples".format(stage),
-            sum(num_bg_samples) / len(num_bg_samples),
-        )
-        return proposals
-
-    def _run_stage(self, features, proposals, stage):
-        """
-        Args:
-            features (list[Tensor]): #lvl input features to ROIHeads
-            proposals (list[Instances]): #image Instances, with the field "proposal_boxes"
-            stage (int): the current stage
-
-        Returns:
-            Same output as `FastRCNNOutputLayers.forward()`.
-        """
-        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
-        # The original implementation averages the losses among heads,
-        # but scale up the parameter gradients of the heads.
-        # This is equivalent to adding the losses among heads,
-        # but scale down the gradients on features.
-        box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
-        box_features = self.box_head[stage](box_features)
-        return self.box_predictor[stage](box_features)
-
-    def _create_proposals_from_boxes(self, boxes, image_sizes):
-        """
-        Args:
-            boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
-            image_sizes (list[tuple]): list of image shapes in (h, w)
-
-        Returns:
-            list[Instances]: per-image proposals with the given boxes.
-        """
-        # Just like RPN, the proposals should not have gradients
-        boxes = [Boxes(b.detach()) for b in boxes]
-        proposals = []
-        for boxes_per_image, image_size in zip(boxes, image_sizes):
-            boxes_per_image.clip(image_size)
-            if self.training:
-                # do not filter empty boxes at inference time,
-                # because the scores from each stage need to be aligned and added later
-                boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
-            prop = Instances(image_size)
-            prop.proposal_boxes = boxes_per_image
-            proposals.append(prop)
-        return proposals
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py
deleted file mode 100644
index ca796ac..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py
+++ /dev/null
@@ -1,510 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import torch
-from fvcore.nn import smooth_l1_loss
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.config import configurable
-from detectron2.layers import Linear, ShapeSpec, batched_nms, cat
-from detectron2.modeling.box_regression import Box2BoxTransform, apply_deltas_broadcast
-from detectron2.structures import Boxes, Instances
-from detectron2.utils.events import get_event_storage
-
-__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"]
-
-
-logger = logging.getLogger(__name__)
-
-"""
-Shape shorthand in this module:
-
-    N: number of images in the minibatch
-    R: number of ROIs, combined over all images, in the minibatch
-    Ri: number of ROIs in image i
-    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
-
-Naming convention:
-
-    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
-    transform (see :class:`box_regression.Box2BoxTransform`).
-
-    pred_class_logits: predicted class scores in [-inf, +inf]; use
-        softmax(pred_class_logits) to estimate P(class).
-
-    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
-        foreground object classes and K represents the background class.
-
-    pred_proposal_deltas: predicted box2box transform deltas for transforming proposals
-        to detection box predictions.
-
-    gt_proposal_deltas: ground-truth box2box transform deltas
-"""
-
-
-def fast_rcnn_inference(boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image):
-    """
-    Call `fast_rcnn_inference_single_image` for all images.
-
-    Args:
-        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
-            boxes for each image. Element i has shape (Ri, K * 4) if doing
-            class-specific regression, or (Ri, 4) if doing class-agnostic
-            regression, where Ri is the number of predicted objects for image i.
-            This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`.
-        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
-            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
-            for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`.
-        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
-        score_thresh (float): Only return detections with a confidence score exceeding this
-            threshold.
-        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
-        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
-            all detections.
-
-    Returns:
-        instances: (list[Instances]): A list of N instances, one for each image in the batch,
-            that stores the topk most confidence detections.
-        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
-            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
-    """
-    result_per_image = [
-        fast_rcnn_inference_single_image(
-            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
-        )
-        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
-    ]
-    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
-
-
-def fast_rcnn_inference_single_image(
-    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
-):
-    """
-    Single-image inference. Return bounding-box detection results by thresholding
-    on scores and applying non-maximum suppression (NMS).
-
-    Args:
-        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
-        per image.
-
-    Returns:
-        Same as `fast_rcnn_inference`, but for only one image.
-    """
-    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
-    if not valid_mask.all():
-        boxes = boxes[valid_mask]
-        scores = scores[valid_mask]
-
-    scores = scores[:, :-1]
-    num_bbox_reg_classes = boxes.shape[1] // 4
-    # Convert to Boxes to use the `clip` function ...
-    boxes = Boxes(boxes.reshape(-1, 4))
-    boxes.clip(image_shape)
-    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
-
-    # Filter results based on detection scores
-    filter_mask = scores > score_thresh  # R x K
-    # R' x 2. First column contains indices of the R predictions;
-    # Second column contains indices of classes.
-    filter_inds = filter_mask.nonzero()
-    if num_bbox_reg_classes == 1:
-        boxes = boxes[filter_inds[:, 0], 0]
-    else:
-        boxes = boxes[filter_mask]
-    scores = scores[filter_mask]
-
-    # Apply per-class NMS
-    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
-    if topk_per_image >= 0:
-        keep = keep[:topk_per_image]
-    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
-
-    result = Instances(image_shape)
-    result.pred_boxes = Boxes(boxes)
-    result.scores = scores
-    result.pred_classes = filter_inds[:, 1]
-    return result, filter_inds[:, 0]
-
-
-class FastRCNNOutputs(object):
-    """
-    A class that stores information about outputs of a Fast R-CNN head.
-    It provides methods that are used to decode the outputs of a Fast R-CNN head.
-    """
-
-    def __init__(
-        self,
-        box2box_transform,
-        pred_class_logits,
-        pred_proposal_deltas,
-        proposals,
-        smooth_l1_beta=0,
-    ):
-        """
-        Args:
-            box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
-                box2box transform instance for proposal-to-detection transformations.
-            pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
-                logits for all R predicted object instances.
-                Each row corresponds to a predicted object instance.
-            pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
-                class-specific or class-agnostic regression. It stores the predicted deltas that
-                transform proposals into final box detections.
-                B is the box dimension (4 or 5).
-                When B is 4, each row is [dx, dy, dw, dh (, ....)].
-                When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
-            proposals (list[Instances]): A list of N Instances, where Instances i stores the
-                proposals for image i, in the field "proposal_boxes".
-                When training, each Instances must have ground-truth labels
-                stored in the field "gt_classes" and "gt_boxes".
-                The total number of all instances must be equal to R.
-            smooth_l1_beta (float): The transition point between L1 and L2 loss in
-                the smooth L1 loss function. When set to 0, the loss becomes L1. When
-                set to +inf, the loss becomes constant 0.
-        """
-        self.box2box_transform = box2box_transform
-        self.num_preds_per_image = [len(p) for p in proposals]
-        self.pred_class_logits = pred_class_logits
-        self.pred_proposal_deltas = pred_proposal_deltas
-        self.smooth_l1_beta = smooth_l1_beta
-        self.image_shapes = [x.image_size for x in proposals]
-
-        if len(proposals):
-            box_type = type(proposals[0].proposal_boxes)
-            # cat(..., dim=0) concatenates over all images in the batch
-            self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
-            assert (
-                not self.proposals.tensor.requires_grad
-            ), "Proposals should not require gradients!"
-
-            # The following fields should exist only when training.
-            if proposals[0].has("gt_boxes"):
-                self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
-                assert proposals[0].has("gt_classes")
-                self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
-        else:
-            self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device))
-        self._no_instances = len(proposals) == 0  # no instances found
-
-    def _log_accuracy(self):
-        """
-        Log the accuracy metrics to EventStorage.
-        """
-        num_instances = self.gt_classes.numel()
-        pred_classes = self.pred_class_logits.argmax(dim=1)
-        bg_class_ind = self.pred_class_logits.shape[1] - 1
-
-        fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)
-        num_fg = fg_inds.nonzero().numel()
-        fg_gt_classes = self.gt_classes[fg_inds]
-        fg_pred_classes = pred_classes[fg_inds]
-
-        num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel()
-        num_accurate = (pred_classes == self.gt_classes).nonzero().numel()
-        fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()
-
-        storage = get_event_storage()
-        if num_instances > 0:
-            storage.put_scalar("fast_rcnn/cls_accuracy", num_accurate / num_instances)
-            if num_fg > 0:
-                storage.put_scalar("fast_rcnn/fg_cls_accuracy", fg_num_accurate / num_fg)
-                storage.put_scalar("fast_rcnn/false_negative", num_false_negative / num_fg)
-
-    def softmax_cross_entropy_loss(self):
-        """
-        Compute the softmax cross entropy loss for box classification.
-
-        Returns:
-            scalar Tensor
-        """
-        if self._no_instances:
-            return 0.0 * self.pred_class_logits.sum()
-        else:
-            self._log_accuracy()
-            return F.cross_entropy(self.pred_class_logits, self.gt_classes, reduction="mean")
-
-    def smooth_l1_loss(self):
-        """
-        Compute the smooth L1 loss for box regression.
-
-        Returns:
-            scalar Tensor
-        """
-        if self._no_instances:
-            return 0.0 * self.pred_proposal_deltas.sum()
-        gt_proposal_deltas = self.box2box_transform.get_deltas(
-            self.proposals.tensor, self.gt_boxes.tensor
-        )
-        box_dim = gt_proposal_deltas.size(1)  # 4 or 5
-        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
-        device = self.pred_proposal_deltas.device
-
-        bg_class_ind = self.pred_class_logits.shape[1] - 1
-
-        # Box delta loss is only computed between the prediction for the gt class k
-        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
-        # for non-gt classes and background.
-        # Empty fg_inds produces a valid loss of zero as long as the size_average
-        # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally
-        # and would produce a nan loss).
-        fg_inds = torch.nonzero(
-            (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind), as_tuple=True
-        )[0]
-        if cls_agnostic_bbox_reg:
-            # pred_proposal_deltas only corresponds to foreground class for agnostic
-            gt_class_cols = torch.arange(box_dim, device=device)
-        else:
-            fg_gt_classes = self.gt_classes[fg_inds]
-            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
-            # where b is the dimension of box representation (4 or 5)
-            # Note that compared to Detectron1,
-            # we do not perform bounding box regression for background classes.
-            gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
-
-        loss_box_reg = smooth_l1_loss(
-            self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
-            gt_proposal_deltas[fg_inds],
-            self.smooth_l1_beta,
-            reduction="sum",
-        )
-        # The loss is normalized using the total number of regions (R), not the number
-        # of foreground regions even though the box regression loss is only defined on
-        # foreground regions. Why? Because doing so gives equal training influence to
-        # each foreground example. To see how, consider two different minibatches:
-        #  (1) Contains a single foreground region
-        #  (2) Contains 100 foreground regions
-        # If we normalize by the number of foreground regions, the single example in
-        # minibatch (1) will be given 100 times as much influence as each foreground
-        # example in minibatch (2). Normalizing by the total number of regions, R,
-        # means that the single example in minibatch (1) and each of the 100 examples
-        # in minibatch (2) are given equal influence.
-        loss_box_reg = loss_box_reg / self.gt_classes.numel()
-        return loss_box_reg
-
-    def _predict_boxes(self):
-        """
-        Returns:
-            Tensor: A Tensors of predicted class-specific or class-agnostic boxes
-                for all images in a batch. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
-                the number of predicted objects for image i and B is the box dimension (4 or 5)
-        """
-        return apply_deltas_broadcast(
-            self.box2box_transform, self.pred_proposal_deltas, self.proposals.tensor
-        )
-
-    """
-    A subclass is expected to have the following methods because
-    they are used to query information about the head predictions.
-    """
-
-    def losses(self):
-        """
-        Compute the default losses for box head in Fast(er) R-CNN,
-        with softmax cross entropy loss and smooth L1 loss.
-
-        Returns:
-            A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg".
-        """
-        return {
-            "loss_cls": self.softmax_cross_entropy_loss(),
-            "loss_box_reg": self.smooth_l1_loss(),
-        }
-
-    def predict_boxes(self):
-        """
-        Deprecated
-        """
-        return self._predict_boxes().split(self.num_preds_per_image, dim=0)
-
-    def predict_probs(self):
-        """
-        Deprecated
-        """
-        probs = F.softmax(self.pred_class_logits, dim=-1)
-        return probs.split(self.num_preds_per_image, dim=0)
-
-    def inference(self, score_thresh, nms_thresh, topk_per_image):
-        """
-        Deprecated
-        """
-        boxes = self.predict_boxes()
-        scores = self.predict_probs()
-        image_shapes = self.image_shapes
-        return fast_rcnn_inference(
-            boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image
-        )
-
-
-class FastRCNNOutputLayers(nn.Module):
-    """
-    Two linear layers for predicting Fast R-CNN outputs:
-      (1) proposal-to-detection box regression deltas
-      (2) classification scores
-    """
-
-    @configurable
-    def __init__(
-        self,
-        input_shape,
-        *,
-        box2box_transform,
-        num_classes,
-        cls_agnostic_bbox_reg=False,
-        smooth_l1_beta=0.0,
-        test_score_thresh=0.0,
-        test_nms_thresh=0.5,
-        test_topk_per_image=100,
-    ):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            input_shape (ShapeSpec): shape of the input feature to this module
-            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
-            num_classes (int): number of foreground classes
-            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
-            smooth_l1_beta (float): transition point from L1 to L2 loss.
-            test_score_thresh (float): threshold to filter predictions results.
-            test_nms_thresh (float): NMS threshold for prediction results.
-            test_topk_per_image (int): number of top predictions to produce per image.
-        """
-        super().__init__()
-        if isinstance(input_shape, int):  # some backward compatibility
-            input_shape = ShapeSpec(channels=input_shape)
-        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
-        # The prediction layer for num_classes foreground classes and one background class
-        # (hence + 1)
-        self.cls_score = Linear(input_size, num_classes + 1)
-        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
-        box_dim = len(box2box_transform.weights)
-        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)
-
-        nn.init.normal_(self.cls_score.weight, std=0.01)
-        nn.init.normal_(self.bbox_pred.weight, std=0.001)
-        for l in [self.cls_score, self.bbox_pred]:
-            nn.init.constant_(l.bias, 0)
-
-        self.box2box_transform = box2box_transform
-        self.smooth_l1_beta = smooth_l1_beta
-        self.test_score_thresh = test_score_thresh
-        self.test_nms_thresh = test_nms_thresh
-        self.test_topk_per_image = test_topk_per_image
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        return {
-            "input_shape": input_shape,
-            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
-            # fmt: off
-            "num_classes"           : cfg.MODEL.ROI_HEADS.NUM_CLASSES,
-            "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
-            "smooth_l1_beta"        : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
-            "test_score_thresh"     : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
-            "test_nms_thresh"       : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
-            "test_topk_per_image"   : cfg.TEST.DETECTIONS_PER_IMAGE
-            # fmt: on
-        }
-
-    def forward(self, x):
-        """
-        Returns:
-            Tensor: Nx(K+1) scores for each box
-            Tensor: Nx4 or Nx(Kx4) bounding box regression deltas.
-        """
-        if x.dim() > 2:
-            x = torch.flatten(x, start_dim=1)
-        scores = self.cls_score(x)
-        proposal_deltas = self.bbox_pred(x)
-        return scores, proposal_deltas
-
-    # TODO: move the implementation to this class.
-    def losses(self, predictions, proposals):
-        """
-        Args:
-            predictions: return values of :meth:`forward()`.
-            proposals (list[Instances]): proposals that match the features
-                that were used to compute predictions.
-        """
-        scores, proposal_deltas = predictions
-        return FastRCNNOutputs(
-            self.box2box_transform, scores, proposal_deltas, proposals, self.smooth_l1_beta
-        ).losses()
-
-    def inference(self, predictions, proposals):
-        """
-        Returns:
-            list[Instances]: same as `fast_rcnn_inference`.
-            list[Tensor]: same as `fast_rcnn_inference`.
-        """
-        boxes = self.predict_boxes(predictions, proposals)
-        scores = self.predict_probs(predictions, proposals)
-        image_shapes = [x.image_size for x in proposals]
-        return fast_rcnn_inference(
-            boxes,
-            scores,
-            image_shapes,
-            self.test_score_thresh,
-            self.test_nms_thresh,
-            self.test_topk_per_image,
-        )
-
-    def predict_boxes_for_gt_classes(self, predictions, proposals):
-        """
-        Returns:
-            list[Tensor]: A list of Tensors of predicted boxes for GT classes in case of
-                class-specific box head. Element i of the list has shape (Ri, B), where Ri is
-                the number of predicted objects for image i and B is the box dimension (4 or 5)
-        """
-        if not len(proposals):
-            return []
-        scores, proposal_deltas = predictions
-        proposal_boxes = [p.proposal_boxes for p in proposals]
-        proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor
-        N, B = proposal_boxes.shape
-        predict_boxes = apply_deltas_broadcast(
-            self.box2box_transform, proposal_deltas, proposal_boxes
-        )  # Nx(KxB)
-
-        K = predict_boxes.shape[1] // B
-        if K > 1:
-            gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0)
-            # Some proposals are ignored or have a background class. Their gt_classes
-            # cannot be used as index.
-            gt_classes = gt_classes.clamp_(0, K - 1)
-
-            predict_boxes = predict_boxes.view(N, K, B)[
-                torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes
-            ]
-        num_prop_per_image = [len(p) for p in proposals]
-        return predict_boxes.split(num_prop_per_image)
-
-    def predict_boxes(self, predictions, proposals):
-        """
-        Returns:
-            list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes
-                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
-                the number of predicted objects for image i and B is the box dimension (4 or 5)
-        """
-        if not len(proposals):
-            return []
-        _, proposal_deltas = predictions
-        num_prop_per_image = [len(p) for p in proposals]
-        proposal_boxes = [p.proposal_boxes for p in proposals]
-        proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor
-        predict_boxes = apply_deltas_broadcast(
-            self.box2box_transform, proposal_deltas, proposal_boxes
-        )  # Nx(KxB)
-        return predict_boxes.split(num_prop_per_image)
-
-    def predict_probs(self, predictions, proposals):
-        """
-        Returns:
-            list[Tensor]: A list of Tensors of predicted class probabilities for each image.
-                Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
-                for image i.
-        """
-        scores, _ = predictions
-        num_inst_per_image = [len(p) for p in proposals]
-        probs = F.softmax(scores, dim=-1)
-        return probs.split(num_inst_per_image, dim=0)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py
deleted file mode 100644
index c7990c8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import List
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.config import configurable
-from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate
-from detectron2.structures import Instances, heatmaps_to_keypoints
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.registry import Registry
-
-_TOTAL_SKIPPED = 0
-
-ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD")
-ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """
-Registry for keypoint heads, which make keypoint predictions from per-region features.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-"""
-
-
-def build_keypoint_head(cfg, input_shape):
-    """
-    Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`.
-    """
-    name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME
-    return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape)
-
-
-def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer):
-    """
-    Arguments:
-        pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number
-            of instances in the batch, K is the number of keypoints, and S is the side length
-            of the keypoint heatmap. The values are spatial logits.
-        instances (list[Instances]): A list of M Instances, where M is the batch size.
-            These instances are predictions from the model
-            that are in 1:1 correspondence with pred_keypoint_logits.
-            Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint`
-            instance.
-        normalizer (float): Normalize the loss by this amount.
-            If not specified, we normalize by the number of visible keypoints in the minibatch.
-
-    Returns a scalar tensor containing the loss.
-    """
-    heatmaps = []
-    valid = []
-
-    keypoint_side_len = pred_keypoint_logits.shape[2]
-    for instances_per_image in instances:
-        if len(instances_per_image) == 0:
-            continue
-        keypoints = instances_per_image.gt_keypoints
-        heatmaps_per_image, valid_per_image = keypoints.to_heatmap(
-            instances_per_image.proposal_boxes.tensor, keypoint_side_len
-        )
-        heatmaps.append(heatmaps_per_image.view(-1))
-        valid.append(valid_per_image.view(-1))
-
-    if len(heatmaps):
-        keypoint_targets = cat(heatmaps, dim=0)
-        valid = cat(valid, dim=0).to(dtype=torch.uint8)
-        valid = torch.nonzero(valid).squeeze(1)
-
-    # torch.mean (in binary_cross_entropy_with_logits) doesn't
-    # accept empty tensors, so handle it separately
-    if len(heatmaps) == 0 or valid.numel() == 0:
-        global _TOTAL_SKIPPED
-        _TOTAL_SKIPPED += 1
-        storage = get_event_storage()
-        storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False)
-        return pred_keypoint_logits.sum() * 0
-
-    N, K, H, W = pred_keypoint_logits.shape
-    pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W)
-
-    keypoint_loss = F.cross_entropy(
-        pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum"
-    )
-
-    # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch
-    if normalizer is None:
-        normalizer = valid.numel()
-    keypoint_loss /= normalizer
-
-    return keypoint_loss
-
-
-def keypoint_rcnn_inference(pred_keypoint_logits, pred_instances):
-    """
-    Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score)
-        and add it to the `pred_instances` as a `pred_keypoints` field.
-
-    Args:
-        pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number
-           of instances in the batch, K is the number of keypoints, and S is the side length of
-           the keypoint heatmap. The values are spatial logits.
-        pred_instances (list[Instances]): A list of N Instances, where N is the number of images.
-
-    Returns:
-        None. Each element in pred_instances will contain an extra "pred_keypoints" field.
-            The field is a tensor of shape (#instance, K, 3) where the last
-            dimension corresponds to (x, y, score).
-            The scores are larger than 0.
-    """
-    # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor)
-    bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0)
-
-    keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits.detach(), bboxes_flat.detach())
-    num_instances_per_image = [len(i) for i in pred_instances]
-    keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0)
-
-    for keypoint_results_per_image, instances_per_image in zip(keypoint_results, pred_instances):
-        # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score)
-        instances_per_image.pred_keypoints = keypoint_results_per_image
-
-
-class BaseKeypointRCNNHead(nn.Module):
-    """
-    Implement the basic Keypoint R-CNN losses and inference logic described in :paper:`Mask R-CNN`.
-    """
-
-    @configurable
-    def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            num_keypoints (int): number of keypoints to predict
-            loss_weight (float): weight to multiple on the keypoint loss
-            loss_normalizer (float or str):
-                If float, divide the loss by `loss_normalizer * #images`.
-                If 'visible', the loss is normalized by the total number of
-                    visible keypoints across images.
-        """
-        super().__init__()
-        self.num_keypoints = num_keypoints
-        self.loss_weight = loss_weight
-        assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer
-        self.loss_normalizer = loss_normalizer
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        ret = {
-            "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT,
-            "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS,
-        }
-        normalize_by_visible = (
-            cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS
-        )  # noqa
-        if not normalize_by_visible:
-            batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
-            positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
-            ret["loss_normalizer"] = (
-                ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction
-            )
-        else:
-            ret["loss_normalizer"] = "visible"
-        return ret
-
-    def forward(self, x, instances: List[Instances]):
-        """
-        Args:
-            x: input region feature(s) provided by :class:`ROIHeads`.
-            instances (list[Instances]): contains the boxes & labels corresponding
-                to the input features.
-                Exact format is up to its caller to decide.
-                Typically, this is the foreground instances in training, with
-                "proposal_boxes" field and other gt annotations.
-                In inference, it contains boxes that are already predicted.
-
-        Returns:
-            A dict of losses if in training. The predicted "instances" if in inference.
-        """
-        x = self.layers(x)
-        if self.training:
-            num_images = len(instances)
-            normalizer = (
-                None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer
-            )
-            return {
-                "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer)
-                * self.loss_weight
-            }
-        else:
-            keypoint_rcnn_inference(x, instances)
-            return instances
-
-    def layers(self, x):
-        """
-        Neural network layers that makes predictions from regional input features.
-        """
-        raise NotImplementedError
-
-
-@ROI_KEYPOINT_HEAD_REGISTRY.register()
-class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead):
-    """
-    A standard keypoint head containing a series of 3x3 convs, followed by
-    a transpose convolution and bilinear interpolation for upsampling.
-    """
-
-    @configurable
-    def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            input_shape (ShapeSpec): shape of the input feature
-            conv_dims: an iterable of output channel counts for each conv in the head
-                         e.g. (512, 512, 512) for three convs outputting 512 channels.
-        """
-        super().__init__(num_keypoints=num_keypoints, **kwargs)
-
-        # default up_scale to 2 (this can be made an option)
-        up_scale = 2
-        in_channels = input_shape.channels
-
-        self.blocks = []
-        for idx, layer_channels in enumerate(conv_dims, 1):
-            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
-            self.add_module("conv_fcn{}".format(idx), module)
-            self.blocks.append(module)
-            in_channels = layer_channels
-
-        deconv_kernel = 4
-        self.score_lowres = ConvTranspose2d(
-            in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
-        )
-        self.up_scale = up_scale
-
-        for name, param in self.named_parameters():
-            if "bias" in name:
-                nn.init.constant_(param, 0)
-            elif "weight" in name:
-                # Caffe2 implementation uses MSRAFill, which in fact
-                # corresponds to kaiming_normal_ in PyTorch
-                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        ret = super().from_config(cfg, input_shape)
-        ret["input_shape"] = input_shape
-        ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
-        return ret
-
-    def layers(self, x):
-        for layer in self.blocks:
-            x = F.relu(layer(x))
-        x = self.score_lowres(x)
-        x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
-        return x
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py
deleted file mode 100644
index 5209722..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import List
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.config import configurable
-from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm
-from detectron2.structures import Instances
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.registry import Registry
-
-ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD")
-ROI_MASK_HEAD_REGISTRY.__doc__ = """
-Registry for mask heads, which predicts instance masks given
-per-region features.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-"""
-
-
-def mask_rcnn_loss(pred_mask_logits, instances, vis_period=0):
-    """
-    Compute the mask prediction loss defined in the Mask R-CNN paper.
-
-    Args:
-        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
-            for class-specific or class-agnostic, where B is the total number of predicted masks
-            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
-            and width of the mask predictions. The values are logits.
-        instances (list[Instances]): A list of N Instances, where N is the number of images
-            in the batch. These instances are in 1:1
-            correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask,
-            ...) associated with each instance are stored in fields.
-        vis_period (int): the period (in steps) to dump visualization.
-
-    Returns:
-        mask_loss (Tensor): A scalar tensor containing the loss.
-    """
-    cls_agnostic_mask = pred_mask_logits.size(1) == 1
-    total_num_masks = pred_mask_logits.size(0)
-    mask_side_len = pred_mask_logits.size(2)
-    assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!"
-
-    gt_classes = []
-    gt_masks = []
-    for instances_per_image in instances:
-        if len(instances_per_image) == 0:
-            continue
-        if not cls_agnostic_mask:
-            gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64)
-            gt_classes.append(gt_classes_per_image)
-
-        gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize(
-            instances_per_image.proposal_boxes.tensor, mask_side_len
-        ).to(device=pred_mask_logits.device)
-        # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len
-        gt_masks.append(gt_masks_per_image)
-
-    if len(gt_masks) == 0:
-        return pred_mask_logits.sum() * 0
-
-    gt_masks = cat(gt_masks, dim=0)
-
-    if cls_agnostic_mask:
-        pred_mask_logits = pred_mask_logits[:, 0]
-    else:
-        indices = torch.arange(total_num_masks)
-        gt_classes = cat(gt_classes, dim=0)
-        pred_mask_logits = pred_mask_logits[indices, gt_classes]
-
-    if gt_masks.dtype == torch.bool:
-        gt_masks_bool = gt_masks
-    else:
-        # Here we allow gt_masks to be float as well (depend on the implementation of rasterize())
-        gt_masks_bool = gt_masks > 0.5
-    gt_masks = gt_masks.to(dtype=torch.float32)
-
-    # Log the training accuracy (using gt classes and 0.5 threshold)
-    mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool
-    mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0))
-    num_positive = gt_masks_bool.sum().item()
-    false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max(
-        gt_masks_bool.numel() - num_positive, 1.0
-    )
-    false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0)
-
-    storage = get_event_storage()
-    storage.put_scalar("mask_rcnn/accuracy", mask_accuracy)
-    storage.put_scalar("mask_rcnn/false_positive", false_positive)
-    storage.put_scalar("mask_rcnn/false_negative", false_negative)
-    if vis_period > 0 and storage.iter % vis_period == 0:
-        pred_masks = pred_mask_logits.sigmoid()
-        vis_masks = torch.cat([pred_masks, gt_masks], axis=2)
-        name = "Left: mask prediction;   Right: mask GT"
-        for idx, vis_mask in enumerate(vis_masks):
-            vis_mask = torch.stack([vis_mask] * 3, axis=0)
-            storage.put_image(name + f" ({idx})", vis_mask)
-
-    mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean")
-    return mask_loss
-
-
-def mask_rcnn_inference(pred_mask_logits, pred_instances):
-    """
-    Convert pred_mask_logits to estimated foreground probability masks while also
-    extracting only the masks for the predicted classes in pred_instances. For each
-    predicted box, the mask of the same class is attached to the instance by adding a
-    new "pred_masks" field to pred_instances.
-
-    Args:
-        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
-            for class-specific or class-agnostic, where B is the total number of predicted masks
-            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
-            and width of the mask predictions. The values are logits.
-        pred_instances (list[Instances]): A list of N Instances, where N is the number of images
-            in the batch. Each Instances must have field "pred_classes".
-
-    Returns:
-        None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask,
-            Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized)
-            masks the resolution predicted by the network; post-processing steps, such as resizing
-            the predicted masks to the original image resolution and/or binarizing them, is left
-            to the caller.
-    """
-    cls_agnostic_mask = pred_mask_logits.size(1) == 1
-
-    if cls_agnostic_mask:
-        mask_probs_pred = pred_mask_logits.sigmoid()
-    else:
-        # Select masks corresponding to the predicted classes
-        num_masks = pred_mask_logits.shape[0]
-        class_pred = cat([i.pred_classes for i in pred_instances])
-        indices = torch.arange(num_masks, device=class_pred.device)
-        mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid()
-    # mask_probs_pred.shape: (B, 1, Hmask, Wmask)
-
-    num_boxes_per_image = [len(i) for i in pred_instances]
-    mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0)
-
-    for prob, instances in zip(mask_probs_pred, pred_instances):
-        instances.pred_masks = prob  # (1, Hmask, Wmask)
-
-
-class BaseMaskRCNNHead(nn.Module):
-    """
-    Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN`
-    """
-
-    @configurable
-    def __init__(self, *, vis_period=0):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            vis_period (int): visualization period
-        """
-        super().__init__()
-        self.vis_period = vis_period
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        return {"vis_period": cfg.VIS_PERIOD}
-
-    def forward(self, x, instances: List[Instances]):
-        """
-        Args:
-            x: input region feature(s) provided by :class:`ROIHeads`.
-            instances (list[Instances]): contains the boxes & labels corresponding
-                to the input features.
-                Exact format is up to its caller to decide.
-                Typically, this is the foreground instances in training, with
-                "proposal_boxes" field and other gt annotations.
-                In inference, it contains boxes that are already predicted.
-
-        Returns:
-            A dict of losses in training. The predicted "instances" in inference.
-        """
-        x = self.layers(x)
-        if self.training:
-            return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period)}
-        else:
-            mask_rcnn_inference(x, instances)
-            return instances
-
-    def layers(self, x):
-        """
-        Neural network layers that makes predictions from input features.
-        """
-        raise NotImplementedError
-
-
-@ROI_MASK_HEAD_REGISTRY.register()
-class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead):
-    """
-    A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`).
-    Predictions are made with a final 1x1 conv layer.
-    """
-
-    @configurable
-    def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            input_shape (ShapeSpec): shape of the input feature
-            num_classes (int): the number of classes. 1 if using class agnostic prediction.
-            conv_dims (list[int]): a list of N>0 integers representing the output dimensions
-                of N-1 conv layers and the last upsample layer.
-            conv_norm (str or callable): normalization for the conv layers.
-                See :func:`detectron2.layers.get_norm` for supported types.
-        """
-        super().__init__(**kwargs)
-        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"
-
-        self.conv_norm_relus = []
-
-        cur_channels = input_shape.channels
-        for k, conv_dim in enumerate(conv_dims[:-1]):
-            conv = Conv2d(
-                cur_channels,
-                conv_dim,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                bias=not conv_norm,
-                norm=get_norm(conv_norm, conv_dim),
-                activation=F.relu,
-            )
-            self.add_module("mask_fcn{}".format(k + 1), conv)
-            self.conv_norm_relus.append(conv)
-            cur_channels = conv_dim
-
-        self.deconv = ConvTranspose2d(
-            cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0
-        )
-        cur_channels = conv_dims[-1]
-
-        self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0)
-
-        for layer in self.conv_norm_relus + [self.deconv]:
-            weight_init.c2_msra_fill(layer)
-        # use normal distribution initialization for mask prediction layer
-        nn.init.normal_(self.predictor.weight, std=0.001)
-        if self.predictor.bias is not None:
-            nn.init.constant_(self.predictor.bias, 0)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        ret = super().from_config(cfg, input_shape)
-        conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
-        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
-        ret.update(
-            conv_dims=[conv_dim] * (num_conv + 1),  # +1 for ConvTranspose
-            conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM,
-            input_shape=input_shape,
-        )
-        if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK:
-            ret["num_classes"] = 1
-        else:
-            ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES
-        return ret
-
-    def layers(self, x):
-        for layer in self.conv_norm_relus:
-            x = layer(x)
-        x = F.relu(self.deconv(x))
-        return self.predictor(x)
-
-
-def build_mask_head(cfg, input_shape):
-    """
-    Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`.
-    """
-    name = cfg.MODEL.ROI_MASK_HEAD.NAME
-    return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py
deleted file mode 100644
index f35588e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py
+++ /dev/null
@@ -1,812 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import inspect
-import logging
-import numpy as np
-from typing import Dict, List, Optional, Tuple, Union
-import torch
-from torch import nn
-
-from detectron2.config import configurable
-from detectron2.layers import ShapeSpec
-from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.registry import Registry
-
-from ..backbone.resnet import BottleneckBlock, make_stage
-from ..matcher import Matcher
-from ..poolers import ROIPooler
-from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
-from ..sampling import subsample_labels
-from .box_head import build_box_head
-from .fast_rcnn import FastRCNNOutputLayers
-from .keypoint_head import build_keypoint_head
-from .mask_head import build_mask_head
-
-ROI_HEADS_REGISTRY = Registry("ROI_HEADS")
-ROI_HEADS_REGISTRY.__doc__ = """
-Registry for ROI heads in a generalized R-CNN model.
-ROIHeads take feature maps and region proposals, and
-perform per-region computation.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-The call is expected to return an :class:`ROIHeads`.
-"""
-
-logger = logging.getLogger(__name__)
-
-
-def build_roi_heads(cfg, input_shape):
-    """
-    Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`.
-    """
-    name = cfg.MODEL.ROI_HEADS.NAME
-    return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape)
-
-
-def select_foreground_proposals(
-    proposals: List[Instances], bg_label: int
-) -> Tuple[List[Instances], List[torch.Tensor]]:
-    """
-    Given a list of N Instances (for N images), each containing a `gt_classes` field,
-    return a list of Instances that contain only instances with `gt_classes != -1 &&
-    gt_classes != bg_label`.
-
-    Args:
-        proposals (list[Instances]): A list of N Instances, where N is the number of
-            images in the batch.
-        bg_label: label index of background class.
-
-    Returns:
-        list[Instances]: N Instances, each contains only the selected foreground instances.
-        list[Tensor]: N boolean vector, correspond to the selection mask of
-            each Instances object. True for selected instances.
-    """
-    assert isinstance(proposals, (list, tuple))
-    assert isinstance(proposals[0], Instances)
-    assert proposals[0].has("gt_classes")
-    fg_proposals = []
-    fg_selection_masks = []
-    for proposals_per_image in proposals:
-        gt_classes = proposals_per_image.gt_classes
-        fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label)
-        fg_idxs = fg_selection_mask.nonzero().squeeze(1)
-        fg_proposals.append(proposals_per_image[fg_idxs])
-        fg_selection_masks.append(fg_selection_mask)
-    return fg_proposals, fg_selection_masks
-
-
-def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]:
-    """
-    Args:
-        proposals (list[Instances]): a list of N Instances, where N is the
-            number of images.
-
-    Returns:
-        proposals: only contains proposals with at least one visible keypoint.
-
-    Note that this is still slightly different from Detectron.
-    In Detectron, proposals for training keypoint head are re-sampled from
-    all the proposals with IOU>threshold & >=1 visible keypoint.
-
-    Here, the proposals are first sampled from all proposals with
-    IOU>threshold, then proposals with no visible keypoint are filtered out.
-    This strategy seems to make no difference on Detectron and is easier to implement.
-    """
-    ret = []
-    all_num_fg = []
-    for proposals_per_image in proposals:
-        # If empty/unannotated image (hard negatives), skip filtering for train
-        if len(proposals_per_image) == 0:
-            ret.append(proposals_per_image)
-            continue
-        gt_keypoints = proposals_per_image.gt_keypoints.tensor
-        # #fg x K x 3
-        vis_mask = gt_keypoints[:, :, 2] >= 1
-        xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1]
-        proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1)  # #fg x 1 x 4
-        kp_in_box = (
-            (xs >= proposal_boxes[:, :, 0])
-            & (xs <= proposal_boxes[:, :, 2])
-            & (ys >= proposal_boxes[:, :, 1])
-            & (ys <= proposal_boxes[:, :, 3])
-        )
-        selection = (kp_in_box & vis_mask).any(dim=1)
-        selection_idxs = torch.nonzero(selection, as_tuple=True)[0]
-        all_num_fg.append(selection_idxs.numel())
-        ret.append(proposals_per_image[selection_idxs])
-
-    storage = get_event_storage()
-    storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg))
-    return ret
-
-
-class ROIHeads(torch.nn.Module):
-    """
-    ROIHeads perform all per-region computation in an R-CNN.
-
-    It typically contains logic to
-    1. (in training only) match proposals with ground truth and sample them
-    2. crop the regions and extract per-region features using proposals
-    3. make per-region predictions with different heads
-
-    It can have many variants, implemented as subclasses of this class.
-    This base class contains the logic to match/sample proposals.
-    But it is not necessary to inherit this class if the sampling logic is not needed.
-    """
-
-    @configurable
-    def __init__(
-        self,
-        *,
-        num_classes,
-        batch_size_per_image,
-        positive_sample_fraction,
-        proposal_matcher,
-        proposal_append_gt=True
-    ):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            num_classes (int): number of classes. Used to label background proposals.
-            batch_size_per_image (int): number of proposals to use for training
-            positive_sample_fraction (float): fraction of positive (foreground) proposals
-                to use for training.
-            proposal_matcher (Matcher): matcher that matches proposals and ground truth
-            proposal_append_gt (bool): whether to include ground truth as proposals as well
-        """
-        super().__init__()
-        self.batch_size_per_image = batch_size_per_image
-        self.positive_sample_fraction = positive_sample_fraction
-        self.num_classes = num_classes
-        self.proposal_matcher = proposal_matcher
-        self.proposal_append_gt = proposal_append_gt
-
-    @classmethod
-    def from_config(cls, cfg):
-        return {
-            "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
-            "positive_sample_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION,
-            "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
-            "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT,
-            # Matcher to assign box proposals to gt boxes
-            "proposal_matcher": Matcher(
-                cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
-                cfg.MODEL.ROI_HEADS.IOU_LABELS,
-                allow_low_quality_matches=False,
-            ),
-        }
-
-    def _sample_proposals(
-        self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Based on the matching between N proposals and M groundtruth,
-        sample the proposals and set their classification labels.
-
-        Args:
-            matched_idxs (Tensor): a vector of length N, each is the best-matched
-                gt index in [0, M) for each proposal.
-            matched_labels (Tensor): a vector of length N, the matcher's label
-                (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal.
-            gt_classes (Tensor): a vector of length M.
-
-        Returns:
-            Tensor: a vector of indices of sampled proposals. Each is in [0, N).
-            Tensor: a vector of the same length, the classification label for
-                each sampled proposal. Each sample is labeled as either a category in
-                [0, num_classes) or the background (num_classes).
-        """
-        has_gt = gt_classes.numel() > 0
-        # Get the corresponding GT for each proposal
-        if has_gt:
-            gt_classes = gt_classes[matched_idxs]
-            # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
-            gt_classes[matched_labels == 0] = self.num_classes
-            # Label ignore proposals (-1 label)
-            gt_classes[matched_labels == -1] = -1
-        else:
-            gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
-
-        sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
-            gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes
-        )
-
-        sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
-        return sampled_idxs, gt_classes[sampled_idxs]
-
-    @torch.no_grad()
-    def label_and_sample_proposals(
-        self, proposals: List[Instances], targets: List[Instances]
-    ) -> List[Instances]:
-        """
-        Prepare some proposals to be used to train the ROI heads.
-        It performs box matching between `proposals` and `targets`, and assigns
-        training labels to the proposals.
-        It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth
-        boxes, with a fraction of positives that is no larger than
-        ``self.positive_sample_fraction``.
-
-        Args:
-            See :meth:`ROIHeads.forward`
-
-        Returns:
-            list[Instances]:
-                length `N` list of `Instances`s containing the proposals
-                sampled for training. Each `Instances` has the following fields:
-
-                - proposal_boxes: the proposal boxes
-                - gt_boxes: the ground-truth box that the proposal is assigned to
-                  (this is only meaningful if the proposal has a label > 0; if label = 0
-                  then the ground-truth box is random)
-
-                Other fields such as "gt_classes", "gt_masks", that's included in `targets`.
-        """
-        gt_boxes = [x.gt_boxes for x in targets]
-        # Augment proposals with ground-truth boxes.
-        # In the case of learned proposals (e.g., RPN), when training starts
-        # the proposals will be low quality due to random initialization.
-        # It's possible that none of these initial
-        # proposals have high enough overlap with the gt objects to be used
-        # as positive examples for the second stage components (box head,
-        # cls head, mask head). Adding the gt boxes to the set of proposals
-        # ensures that the second stage components will have some positive
-        # examples from the start of training. For RPN, this augmentation improves
-        # convergence and empirically improves box AP on COCO by about 0.5
-        # points (under one tested configuration).
-        if self.proposal_append_gt:
-            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
-
-        proposals_with_gt = []
-
-        num_fg_samples = []
-        num_bg_samples = []
-        for proposals_per_image, targets_per_image in zip(proposals, targets):
-            has_gt = len(targets_per_image) > 0
-            match_quality_matrix = pairwise_iou(
-                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
-            )
-            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
-            sampled_idxs, gt_classes = self._sample_proposals(
-                matched_idxs, matched_labels, targets_per_image.gt_classes
-            )
-
-            # Set target attributes of the sampled proposals:
-            proposals_per_image = proposals_per_image[sampled_idxs]
-            proposals_per_image.gt_classes = gt_classes
-
-            # We index all the attributes of targets that start with "gt_"
-            # and have not been added to proposals yet (="gt_classes").
-            if has_gt:
-                sampled_targets = matched_idxs[sampled_idxs]
-                # NOTE: here the indexing waste some compute, because heads
-                # like masks, keypoints, etc, will filter the proposals again,
-                # (by foreground/background, or number of keypoints in the image, etc)
-                # so we essentially index the data twice.
-                for (trg_name, trg_value) in targets_per_image.get_fields().items():
-                    if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name):
-                        proposals_per_image.set(trg_name, trg_value[sampled_targets])
-            else:
-                gt_boxes = Boxes(
-                    targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 4))
-                )
-                proposals_per_image.gt_boxes = gt_boxes
-
-            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
-            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
-            proposals_with_gt.append(proposals_per_image)
-
-        # Log the number of fg/bg samples that are selected for training ROI heads
-        storage = get_event_storage()
-        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
-        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
-
-        return proposals_with_gt
-
-    def forward(
-        self,
-        images: ImageList,
-        features: Dict[str, torch.Tensor],
-        proposals: List[Instances],
-        targets: Optional[List[Instances]] = None,
-    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
-        """
-        Args:
-            images (ImageList):
-            features (dict[str,Tensor]): input data as a mapping from feature
-                map name to tensor. Axis 0 represents the number of images `N` in
-                the input data; axes 1-3 are channels, height, and width, which may
-                vary between feature maps (e.g., if a feature pyramid is used).
-            proposals (list[Instances]): length `N` list of `Instances`. The i-th
-                `Instances` contains object proposals for the i-th input image,
-                with fields "proposal_boxes" and "objectness_logits".
-            targets (list[Instances], optional): length `N` list of `Instances`. The i-th
-                `Instances` contains the ground-truth per-instance annotations
-                for the i-th input image.  Specify `targets` during training only.
-                It may have the following fields:
-
-                - gt_boxes: the bounding box of each instance.
-                - gt_classes: the label for each instance with a category ranging in [0, #class].
-                - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance.
-                - gt_keypoints: NxKx3, the groud-truth keypoints for each instance.
-
-        Returns:
-            list[Instances]: length `N` list of `Instances` containing the
-            detected instances. Returned during inference only; may be [] during training.
-
-            dict[str->Tensor]:
-            mapping from a named loss to a tensor storing the loss. Used during training only.
-        """
-        raise NotImplementedError()
-
-
-@ROI_HEADS_REGISTRY.register()
-class Res5ROIHeads(ROIHeads):
-    """
-    The ROIHeads in a typical "C4" R-CNN model, where
-    the box and mask head share the cropping and
-    the per-region feature computation by a Res5 block.
-    """
-
-    def __init__(self, cfg, input_shape):
-        super().__init__(cfg)
-
-        # fmt: off
-        self.in_features  = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
-        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
-        pooler_scales     = (1.0 / input_shape[self.in_features[0]].stride, )
-        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
-        self.mask_on      = cfg.MODEL.MASK_ON
-        # fmt: on
-        assert not cfg.MODEL.KEYPOINT_ON
-        assert len(self.in_features) == 1
-
-        self.pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-
-        self.res5, out_channels = self._build_res5_block(cfg)
-        self.box_predictor = FastRCNNOutputLayers(
-            cfg, ShapeSpec(channels=out_channels, height=1, width=1)
-        )
-
-        if self.mask_on:
-            self.mask_head = build_mask_head(
-                cfg,
-                ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution),
-            )
-
-    def _build_res5_block(self, cfg):
-        # fmt: off
-        stage_channel_factor = 2 ** 3  # res5 is 8x res2
-        num_groups           = cfg.MODEL.RESNETS.NUM_GROUPS
-        width_per_group      = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
-        bottleneck_channels  = num_groups * width_per_group * stage_channel_factor
-        out_channels         = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor
-        stride_in_1x1        = cfg.MODEL.RESNETS.STRIDE_IN_1X1
-        norm                 = cfg.MODEL.RESNETS.NORM
-        assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \
-            "Deformable conv is not yet supported in res5 head."
-        # fmt: on
-
-        blocks = make_stage(
-            BottleneckBlock,
-            3,
-            first_stride=2,
-            in_channels=out_channels // 2,
-            bottleneck_channels=bottleneck_channels,
-            out_channels=out_channels,
-            num_groups=num_groups,
-            norm=norm,
-            stride_in_1x1=stride_in_1x1,
-        )
-        return nn.Sequential(*blocks), out_channels
-
-    def _shared_roi_transform(self, features, boxes):
-        x = self.pooler(features, boxes)
-        return self.res5(x)
-
-    def forward(self, images, features, proposals, targets=None):
-        """
-        See :meth:`ROIHeads.forward`.
-        """
-        del images
-
-        if self.training:
-            assert targets
-            proposals = self.label_and_sample_proposals(proposals, targets)
-        del targets
-
-        proposal_boxes = [x.proposal_boxes for x in proposals]
-        box_features = self._shared_roi_transform(
-            [features[f] for f in self.in_features], proposal_boxes
-        )
-        predictions = self.box_predictor(box_features.mean(dim=[2, 3]))
-
-        if self.training:
-            del features
-            losses = self.box_predictor.losses(predictions, proposals)
-            if self.mask_on:
-                proposals, fg_selection_masks = select_foreground_proposals(
-                    proposals, self.num_classes
-                )
-                # Since the ROI feature transform is shared between boxes and masks,
-                # we don't need to recompute features. The mask loss is only defined
-                # on foreground proposals, so we need to select out the foreground
-                # features.
-                mask_features = box_features[torch.cat(fg_selection_masks, dim=0)]
-                del box_features
-                losses.update(self.mask_head(mask_features, proposals))
-            return [], losses
-        else:
-            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
-            pred_instances = self.forward_with_given_boxes(features, pred_instances)
-            return pred_instances, {}
-
-    def forward_with_given_boxes(self, features, instances):
-        """
-        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
-
-        Args:
-            features: same as in `forward()`
-            instances (list[Instances]): instances to predict other outputs. Expect the keys
-                "pred_boxes" and "pred_classes" to exist.
-
-        Returns:
-            instances (Instances):
-                the same `Instances` object, with extra
-                fields such as `pred_masks` or `pred_keypoints`.
-        """
-        assert not self.training
-        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
-
-        if self.mask_on:
-            features = [features[f] for f in self.in_features]
-            x = self._shared_roi_transform(features, [x.pred_boxes for x in instances])
-            return self.mask_head(x, instances)
-        else:
-            return instances
-
-
-@ROI_HEADS_REGISTRY.register()
-class StandardROIHeads(ROIHeads):
-    """
-    It's "standard" in a sense that there is no ROI transform sharing
-    or feature sharing between tasks.
-    Each head independently processes the input features by each head's
-    own pooler and head.
-
-    This class is used by most models, such as FPN and C5.
-    To implement more models, you can subclass it and implement a different
-    :meth:`forward()` or a head.
-    """
-
-    @configurable
-    def __init__(
-        self,
-        *,
-        box_in_features: List[str],
-        box_pooler: ROIPooler,
-        box_head: nn.Module,
-        box_predictor: nn.Module,
-        mask_in_features: Optional[List[str]] = None,
-        mask_pooler: Optional[ROIPooler] = None,
-        mask_head: Optional[nn.Module] = None,
-        keypoint_in_features: Optional[List[str]] = None,
-        keypoint_pooler: Optional[ROIPooler] = None,
-        keypoint_head: Optional[nn.Module] = None,
-        train_on_pred_boxes: bool = False,
-        **kwargs
-    ):
-        """
-        NOTE: this interface is experimental.
-
-        Args:
-            box_in_features (list[str]): list of feature names to use for the box head.
-            box_pooler (ROIPooler): pooler to extra region features for box head
-            box_head (nn.Module): transform features to make box predictions
-            box_predictor (nn.Module): make box predictions from the feature.
-                Should have the same interface as :class:`FastRCNNOutputLayers`.
-            mask_in_features (list[str]): list of feature names to use for the mask head.
-                None if not using mask head.
-            mask_pooler (ROIPooler): pooler to extra region features for mask head
-            mask_head (nn.Module): transform features to make mask predictions
-            keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask*``.
-            train_on_pred_boxes (bool): whether to use proposal boxes or
-                predicted boxes from the box head to train other heads.
-        """
-        super().__init__(**kwargs)
-        # keep self.in_features for backward compatibility
-        self.in_features = self.box_in_features = box_in_features
-        self.box_pooler = box_pooler
-        self.box_head = box_head
-        self.box_predictor = box_predictor
-
-        self.mask_on = mask_in_features is not None
-        if self.mask_on:
-            self.mask_in_features = mask_in_features
-            self.mask_pooler = mask_pooler
-            self.mask_head = mask_head
-        self.keypoint_on = keypoint_in_features is not None
-        if self.keypoint_on:
-            self.keypoint_in_features = keypoint_in_features
-            self.keypoint_pooler = keypoint_pooler
-            self.keypoint_head = keypoint_head
-
-        self.train_on_pred_boxes = train_on_pred_boxes
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        ret = super().from_config(cfg)
-        ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
-        # Subclasses that have not been updated to use from_config style construction
-        # may have overridden _init_*_head methods. In this case, those overridden methods
-        # will not be classmethods and we need to avoid trying to call them here.
-        # We test for this with ismethod which only returns True for bound methods of cls.
-        # Such subclasses will need to handle calling their overridden _init_*_head methods.
-        if inspect.ismethod(cls._init_box_head):
-            ret.update(cls._init_box_head(cfg, input_shape))
-        if inspect.ismethod(cls._init_mask_head):
-            ret.update(cls._init_mask_head(cfg, input_shape))
-        if inspect.ismethod(cls._init_keypoint_head):
-            ret.update(cls._init_keypoint_head(cfg, input_shape))
-        return ret
-
-    @classmethod
-    def _init_box_head(cls, cfg, input_shape):
-        # fmt: off
-        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
-        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
-        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
-        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
-        # fmt: on
-
-        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
-        # then we share the same predictors and therefore the channel counts must be the same
-        in_channels = [input_shape[f].channels for f in in_features]
-        # Check all channel counts are equal
-        assert len(set(in_channels)) == 1, in_channels
-        in_channels = in_channels[0]
-
-        box_pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-        # Here we split "box head" and "box predictor", which is mainly due to historical reasons.
-        # They are used together so the "box predictor" layers should be part of the "box head".
-        # New subclasses of ROIHeads do not need "box predictor"s.
-        box_head = build_box_head(
-            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
-        )
-        box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape)
-        return {
-            "box_in_features": in_features,
-            "box_pooler": box_pooler,
-            "box_head": box_head,
-            "box_predictor": box_predictor,
-        }
-
-    @classmethod
-    def _init_mask_head(cls, cfg, input_shape):
-        if not cfg.MODEL.MASK_ON:
-            return {}
-        # fmt: off
-        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
-        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
-        sampling_ratio    = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
-        pooler_type       = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE
-        # fmt: on
-
-        in_channels = [input_shape[f].channels for f in in_features][0]
-
-        ret = {"mask_in_features": in_features}
-        ret["mask_pooler"] = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-        ret["mask_head"] = build_mask_head(
-            cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution)
-        )
-        return ret
-
-    @classmethod
-    def _init_keypoint_head(cls, cfg, input_shape):
-        if not cfg.MODEL.KEYPOINT_ON:
-            return {}
-        # fmt: off
-        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
-        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)  # noqa
-        sampling_ratio    = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
-        pooler_type       = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE
-        # fmt: on
-
-        in_channels = [input_shape[f].channels for f in in_features][0]
-
-        ret = {"keypoint_in_features": in_features}
-        ret["keypoint_pooler"] = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-        ret["keypoint_head"] = build_keypoint_head(
-            cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution)
-        )
-        return ret
-
-    def forward(
-        self,
-        images: ImageList,
-        features: Dict[str, torch.Tensor],
-        proposals: List[Instances],
-        targets: Optional[List[Instances]] = None,
-    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
-        """
-        See :class:`ROIHeads.forward`.
-        """
-        del images
-        if self.training:
-            assert targets
-            proposals = self.label_and_sample_proposals(proposals, targets)
-        del targets
-
-        if self.training:
-            losses = self._forward_box(features, proposals)
-            # Usually the original proposals used by the box head are used by the mask, keypoint
-            # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes
-            # predicted by the box head.
-            losses.update(self._forward_mask(features, proposals))
-            losses.update(self._forward_keypoint(features, proposals))
-            return proposals, losses
-        else:
-            pred_instances = self._forward_box(features, proposals)
-            # During inference cascaded prediction is used: the mask and keypoints heads are only
-            # applied to the top scoring box detections.
-            pred_instances = self.forward_with_given_boxes(features, pred_instances)
-            return pred_instances, {}
-
-    def forward_with_given_boxes(
-        self, features: Dict[str, torch.Tensor], instances: List[Instances]
-    ) -> List[Instances]:
-        """
-        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
-
-        This is useful for downstream tasks where a box is known, but need to obtain
-        other attributes (outputs of other heads).
-        Test-time augmentation also uses this.
-
-        Args:
-            features: same as in `forward()`
-            instances (list[Instances]): instances to predict other outputs. Expect the keys
-                "pred_boxes" and "pred_classes" to exist.
-
-        Returns:
-            instances (list[Instances]):
-                the same `Instances` objects, with extra
-                fields such as `pred_masks` or `pred_keypoints`.
-        """
-        assert not self.training
-        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
-
-        instances = self._forward_mask(features, instances)
-        instances = self._forward_keypoint(features, instances)
-        return instances
-
-    def _forward_box(
-        self, features: Dict[str, torch.Tensor], proposals: List[Instances]
-    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
-        """
-        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
-            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.
-
-        Args:
-            features (dict[str, Tensor]): mapping from feature map names to tensor.
-                Same as in :meth:`ROIHeads.forward`.
-            proposals (list[Instances]): the per-image object proposals with
-                their matching ground truth.
-                Each has fields "proposal_boxes", and "objectness_logits",
-                "gt_classes", "gt_boxes".
-
-        Returns:
-            In training, a dict of losses.
-            In inference, a list of `Instances`, the predicted instances.
-        """
-        features = [features[f] for f in self.box_in_features]
-        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
-        box_features = self.box_head(box_features)
-        predictions = self.box_predictor(box_features)
-        del box_features
-
-        if self.training:
-            losses = self.box_predictor.losses(predictions, proposals)
-            # proposals is modified in-place below, so losses must be computed first.
-            if self.train_on_pred_boxes:
-                with torch.no_grad():
-                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
-                        predictions, proposals
-                    )
-                    for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
-                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
-            return losses
-        else:
-            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
-            return pred_instances
-
-    def _forward_mask(
-        self, features: Dict[str, torch.Tensor], instances: List[Instances]
-    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
-        """
-        Forward logic of the mask prediction branch.
-
-        Args:
-            features (dict[str, Tensor]): mapping from feature map names to tensor.
-                Same as in :meth:`ROIHeads.forward`.
-            instances (list[Instances]): the per-image instances to train/predict masks.
-                In training, they can be the proposals.
-                In inference, they can be the predicted boxes.
-
-        Returns:
-            In training, a dict of losses.
-            In inference, update `instances` with new fields "pred_masks" and return it.
-        """
-        if not self.mask_on:
-            return {} if self.training else instances
-
-        features = [features[f] for f in self.mask_in_features]
-
-        if self.training:
-            # The loss is only defined on positive proposals.
-            proposals, _ = select_foreground_proposals(instances, self.num_classes)
-            proposal_boxes = [x.proposal_boxes for x in proposals]
-            mask_features = self.mask_pooler(features, proposal_boxes)
-            return self.mask_head(mask_features, proposals)
-        else:
-            pred_boxes = [x.pred_boxes for x in instances]
-            mask_features = self.mask_pooler(features, pred_boxes)
-            return self.mask_head(mask_features, instances)
-
-    def _forward_keypoint(
-        self, features: Dict[str, torch.Tensor], instances: List[Instances]
-    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
-        """
-        Forward logic of the keypoint prediction branch.
-
-        Args:
-            features (dict[str, Tensor]): mapping from feature map names to tensor.
-                Same as in :meth:`ROIHeads.forward`.
-            instances (list[Instances]): the per-image instances to train/predict keypoints.
-                In training, they can be the proposals.
-                In inference, they can be the predicted boxes.
-
-        Returns:
-            In training, a dict of losses.
-            In inference, update `instances` with new fields "pred_keypoints" and return it.
-        """
-        if not self.keypoint_on:
-            return {} if self.training else instances
-
-        features = [features[f] for f in self.keypoint_in_features]
-
-        if self.training:
-            # The loss is defined on positive proposals with >=1 visible keypoints.
-            proposals, _ = select_foreground_proposals(instances, self.num_classes)
-            proposals = select_proposals_with_visible_keypoints(proposals)
-            proposal_boxes = [x.proposal_boxes for x in proposals]
-
-            keypoint_features = self.keypoint_pooler(features, proposal_boxes)
-            return self.keypoint_head(keypoint_features, proposals)
-        else:
-            pred_boxes = [x.pred_boxes for x in instances]
-            keypoint_features = self.keypoint_pooler(features, pred_boxes)
-            return self.keypoint_head(keypoint_features, instances)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
deleted file mode 100644
index 3d7362d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
+++ /dev/null
@@ -1,276 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import numpy as np
-import torch
-
-from detectron2.config import configurable
-from detectron2.layers import ShapeSpec, batched_nms_rotated
-from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
-from detectron2.utils.events import get_event_storage
-
-from ..box_regression import Box2BoxTransformRotated
-from ..poolers import ROIPooler
-from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
-from .box_head import build_box_head
-from .fast_rcnn import FastRCNNOutputLayers
-from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
-
-logger = logging.getLogger(__name__)
-
-"""
-Shape shorthand in this module:
-
-    N: number of images in the minibatch
-    R: number of ROIs, combined over all images, in the minibatch
-    Ri: number of ROIs in image i
-    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
-
-Naming convention:
-
-    deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box
-    transform (see :class:`box_regression.Box2BoxTransformRotated`).
-
-    pred_class_logits: predicted class scores in [-inf, +inf]; use
-        softmax(pred_class_logits) to estimate P(class).
-
-    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
-        foreground object classes and K represents the background class.
-
-    pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals
-        to detection box predictions.
-
-    gt_proposal_deltas: ground-truth rotated box2box transform deltas
-"""
-
-
-def fast_rcnn_inference_rotated(
-    boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image
-):
-    """
-    Call `fast_rcnn_inference_single_image_rotated` for all images.
-
-    Args:
-        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
-            boxes for each image. Element i has shape (Ri, K * 5) if doing
-            class-specific regression, or (Ri, 5) if doing class-agnostic
-            regression, where Ri is the number of predicted objects for image i.
-            This is compatible with the output of :meth:`FastRCNNOutputs.predict_boxes`.
-        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
-            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
-            for image i. Compatible with the output of :meth:`FastRCNNOutputs.predict_probs`.
-        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
-        score_thresh (float): Only return detections with a confidence score exceeding this
-            threshold.
-        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
-        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
-            all detections.
-
-    Returns:
-        instances: (list[Instances]): A list of N instances, one for each image in the batch,
-            that stores the topk most confidence detections.
-        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
-            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
-    """
-    result_per_image = [
-        fast_rcnn_inference_single_image_rotated(
-            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
-        )
-        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
-    ]
-    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
-
-
-def fast_rcnn_inference_single_image_rotated(
-    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
-):
-    """
-    Single-image inference. Return rotated bounding-box detection results by thresholding
-    on scores and applying rotated non-maximum suppression (Rotated NMS).
-
-    Args:
-        Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes
-        per image.
-
-    Returns:
-        Same as `fast_rcnn_inference_rotated`, but for only one image.
-    """
-    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
-    if not valid_mask.all():
-        boxes = boxes[valid_mask]
-        scores = scores[valid_mask]
-
-    B = 5  # box dimension
-    scores = scores[:, :-1]
-    num_bbox_reg_classes = boxes.shape[1] // B
-    # Convert to Boxes to use the `clip` function ...
-    boxes = RotatedBoxes(boxes.reshape(-1, B))
-    boxes.clip(image_shape)
-    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B)  # R x C x B
-    # Filter results based on detection scores
-    filter_mask = scores > score_thresh  # R x K
-    # R' x 2. First column contains indices of the R predictions;
-    # Second column contains indices of classes.
-    filter_inds = filter_mask.nonzero()
-    if num_bbox_reg_classes == 1:
-        boxes = boxes[filter_inds[:, 0], 0]
-    else:
-        boxes = boxes[filter_mask]
-    scores = scores[filter_mask]
-
-    # Apply per-class Rotated NMS
-    keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh)
-    if topk_per_image >= 0:
-        keep = keep[:topk_per_image]
-    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
-
-    result = Instances(image_shape)
-    result.pred_boxes = RotatedBoxes(boxes)
-    result.scores = scores
-    result.pred_classes = filter_inds[:, 1]
-
-    return result, filter_inds[:, 0]
-
-
-class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers):
-    """
-    Two linear layers for predicting Rotated Fast R-CNN outputs.
-    """
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        args = super().from_config(cfg, input_shape)
-        args["box2box_transform"] = Box2BoxTransformRotated(
-            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS
-        )
-        return args
-
-    def inference(self, predictions, proposals):
-        """
-        Returns:
-            list[Instances]: same as `fast_rcnn_inference_rotated`.
-            list[Tensor]: same as `fast_rcnn_inference_rotated`.
-        """
-        boxes = self.predict_boxes(predictions, proposals)
-        scores = self.predict_probs(predictions, proposals)
-        image_shapes = [x.image_size for x in proposals]
-
-        return fast_rcnn_inference_rotated(
-            boxes,
-            scores,
-            image_shapes,
-            self.test_score_thresh,
-            self.test_nms_thresh,
-            self.test_topk_per_image,
-        )
-
-
-@ROI_HEADS_REGISTRY.register()
-class RROIHeads(StandardROIHeads):
-    """
-    This class is used by Rotated Fast R-CNN to detect rotated boxes.
-    For now, it only supports box predictions but not mask or keypoints.
-    """
-
-    @configurable
-    def __init__(self, **kwargs):
-        """
-        NOTE: this interface is experimental.
-        """
-        super().__init__(**kwargs)
-        assert (
-            not self.mask_on and not self.keypoint_on
-        ), "Mask/Keypoints not supported in Rotated ROIHeads."
-        assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!"
-
-    @classmethod
-    def _init_box_head(cls, cfg, input_shape):
-        # fmt: off
-        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
-        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
-        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
-        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
-        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
-        # fmt: on
-        assert pooler_type in ["ROIAlignRotated"], pooler_type
-        # assume all channel counts are equal
-        in_channels = [input_shape[f].channels for f in in_features][0]
-
-        box_pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type=pooler_type,
-        )
-        box_head = build_box_head(
-            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
-        )
-        # This line is the only difference v.s. StandardROIHeads
-        box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape)
-        return {
-            "box_in_features": in_features,
-            "box_pooler": box_pooler,
-            "box_head": box_head,
-            "box_predictor": box_predictor,
-        }
-
-    @torch.no_grad()
-    def label_and_sample_proposals(self, proposals, targets):
-        """
-        Prepare some proposals to be used to train the RROI heads.
-        It performs box matching between `proposals` and `targets`, and assigns
-        training labels to the proposals.
-        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
-        with a fraction of positives that is no larger than `self.positive_sample_fraction.
-
-        Args:
-            See :meth:`StandardROIHeads.forward`
-
-        Returns:
-            list[Instances]: length `N` list of `Instances`s containing the proposals
-                sampled for training. Each `Instances` has the following fields:
-                - proposal_boxes: the rotated proposal boxes
-                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
-                  (this is only meaningful if the proposal has a label > 0; if label = 0
-                   then the ground-truth box is random)
-                - gt_classes: the ground-truth classification lable for each proposal
-        """
-        gt_boxes = [x.gt_boxes for x in targets]
-        if self.proposal_append_gt:
-            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
-
-        proposals_with_gt = []
-
-        num_fg_samples = []
-        num_bg_samples = []
-        for proposals_per_image, targets_per_image in zip(proposals, targets):
-            has_gt = len(targets_per_image) > 0
-            match_quality_matrix = pairwise_iou_rotated(
-                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
-            )
-            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
-            sampled_idxs, gt_classes = self._sample_proposals(
-                matched_idxs, matched_labels, targets_per_image.gt_classes
-            )
-
-            proposals_per_image = proposals_per_image[sampled_idxs]
-            proposals_per_image.gt_classes = gt_classes
-
-            if has_gt:
-                sampled_targets = matched_idxs[sampled_idxs]
-                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
-            else:
-                gt_boxes = RotatedBoxes(
-                    targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5))
-                )
-                proposals_per_image.gt_boxes = gt_boxes
-
-            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
-            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
-            proposals_with_gt.append(proposals_per_image)
-
-        # Log the number of fg/bg samples that are selected for training ROI heads
-        storage = get_event_storage()
-        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
-        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
-
-        return proposals_with_gt
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py
deleted file mode 100644
index ecf251a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-
-__all__ = ["subsample_labels"]
-
-
-def subsample_labels(labels, num_samples, positive_fraction, bg_label):
-    """
-    Return `num_samples` (or fewer, if not enough found)
-    random samples from `labels` which is a mixture of positives & negatives.
-    It will try to return as many positives as possible without
-    exceeding `positive_fraction * num_samples`, and then try to
-    fill the remaining slots with negatives.
-
-    Args:
-        labels (Tensor): (N, ) label vector with values:
-            * -1: ignore
-            * bg_label: background ("negative") class
-            * otherwise: one or more foreground ("positive") classes
-        num_samples (int): The total number of labels with value >= 0 to return.
-            Values that are not sampled will be filled with -1 (ignore).
-        positive_fraction (float): The number of subsampled labels with values > 0
-            is `min(num_positives, int(positive_fraction * num_samples))`. The number
-            of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
-            In order words, if there are not enough positives, the sample is filled with
-            negatives. If there are also not enough negatives, then as many elements are
-            sampled as is possible.
-        bg_label (int): label index of background ("negative") class.
-
-    Returns:
-        pos_idx, neg_idx (Tensor):
-            1D vector of indices. The total length of both is `num_samples` or fewer.
-    """
-    positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=True)[0]
-    negative = torch.nonzero(labels == bg_label, as_tuple=True)[0]
-
-    num_pos = int(num_samples * positive_fraction)
-    # protect against not enough positive examples
-    num_pos = min(positive.numel(), num_pos)
-    num_neg = num_samples - num_pos
-    # protect against not enough negative examples
-    num_neg = min(negative.numel(), num_neg)
-
-    # randomly select positive and negative examples
-    perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
-    perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
-
-    pos_idx = positive[perm1]
-    neg_idx = negative[perm2]
-    return pos_idx, neg_idx
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py
deleted file mode 100644
index 1e5bcf0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py
+++ /dev/null
@@ -1,285 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import numpy as np
-from contextlib import contextmanager
-from itertools import count
-import torch
-from torch import nn
-from torch.nn.parallel import DistributedDataParallel
-
-from detectron2.data.detection_utils import read_image
-from detectron2.data.transforms import ResizeShortestEdge
-from detectron2.structures import Instances
-
-from .meta_arch import GeneralizedRCNN
-from .postprocessing import detector_postprocess
-from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image
-
-__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"]
-
-
-class DatasetMapperTTA:
-    """
-    Implement test-time augmentation for detection data.
-    It is a callable which takes a dataset dict from a detection dataset,
-    and returns a list of dataset dicts where the images
-    are augmented from the input image by the transformations defined in the config.
-    This is used for test-time augmentation.
-    """
-
-    def __init__(self, cfg):
-        self.min_sizes = cfg.TEST.AUG.MIN_SIZES
-        self.max_size = cfg.TEST.AUG.MAX_SIZE
-        self.flip = cfg.TEST.AUG.FLIP
-        self.image_format = cfg.INPUT.FORMAT
-
-    def __call__(self, dataset_dict):
-        """
-        Args:
-            dict: a detection dataset dict
-
-        Returns:
-            list[dict]:
-                a list of dataset dicts, which contain augmented version of the input image.
-                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
-        """
-        ret = []
-        if "image" not in dataset_dict:
-            numpy_image = read_image(dataset_dict["file_name"], self.image_format)
-        else:
-            numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy().astype("uint8")
-        for min_size in self.min_sizes:
-            image = np.copy(numpy_image)
-            tfm = ResizeShortestEdge(min_size, self.max_size).get_transform(image)
-            resized = tfm.apply_image(image)
-            resized = torch.as_tensor(resized.transpose(2, 0, 1).astype("float32"))
-
-            dic = copy.deepcopy(dataset_dict)
-            dic["horiz_flip"] = False
-            dic["image"] = resized
-            ret.append(dic)
-
-            if self.flip:
-                dic = copy.deepcopy(dataset_dict)
-                dic["horiz_flip"] = True
-                dic["image"] = torch.flip(resized, dims=[2])
-                ret.append(dic)
-        return ret
-
-
-class GeneralizedRCNNWithTTA(nn.Module):
-    """
-    A GeneralizedRCNN with test-time augmentation enabled.
-    Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`.
-    """
-
-    def __init__(self, cfg, model, tta_mapper=None, batch_size=3):
-        """
-        Args:
-            cfg (CfgNode):
-            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
-            tta_mapper (callable): takes a dataset dict and returns a list of
-                augmented versions of the dataset dict. Defaults to
-                `DatasetMapperTTA(cfg)`.
-            batch_size (int): batch the augmented images into this batch size for inference.
-        """
-        super().__init__()
-        if isinstance(model, DistributedDataParallel):
-            model = model.module
-        assert isinstance(
-            model, GeneralizedRCNN
-        ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model))
-        self.cfg = cfg.clone()
-        assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet"
-        assert (
-            not self.cfg.MODEL.LOAD_PROPOSALS
-        ), "TTA for pre-computed proposals is not supported yet"
-
-        self.model = model
-
-        if tta_mapper is None:
-            tta_mapper = DatasetMapperTTA(cfg)
-        self.tta_mapper = tta_mapper
-        self.batch_size = batch_size
-
-    @contextmanager
-    def _turn_off_roi_heads(self, attrs):
-        """
-        Open a context where some heads in `model.roi_heads` are temporarily turned off.
-        Args:
-            attr (list[str]): the attribute in `model.roi_heads` which can be used
-                to turn off a specific head, e.g., "mask_on", "keypoint_on".
-        """
-        roi_heads = self.model.roi_heads
-        old = {}
-        for attr in attrs:
-            try:
-                old[attr] = getattr(roi_heads, attr)
-            except AttributeError:
-                # The head may not be implemented in certain ROIHeads
-                pass
-
-        if len(old.keys()) == 0:
-            yield
-        else:
-            for attr in old.keys():
-                setattr(roi_heads, attr, False)
-            yield
-            for attr in old.keys():
-                setattr(roi_heads, attr, old[attr])
-
-    def _batch_inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
-        """
-        Execute inference on a list of inputs,
-        using batch size = self.batch_size, instead of the length of the list.
-
-        Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
-        """
-        if detected_instances is None:
-            detected_instances = [None] * len(batched_inputs)
-
-        outputs = []
-        inputs, instances = [], []
-        for idx, input, instance in zip(count(), batched_inputs, detected_instances):
-            inputs.append(input)
-            instances.append(instance)
-            if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1:
-                outputs.extend(
-                    self.model.inference(
-                        inputs,
-                        instances if instances[0] is not None else None,
-                        do_postprocess=do_postprocess,
-                    )
-                )
-                inputs, instances = [], []
-        return outputs
-
-    def __call__(self, batched_inputs):
-        """
-        Same input/output format as :meth:`GeneralizedRCNN.forward`
-        """
-        return [self._inference_one_image(x) for x in batched_inputs]
-
-    def _detector_postprocess(self, outputs, aug_vars):
-        return detector_postprocess(outputs, aug_vars["height"], aug_vars["width"])
-
-    def _inference_one_image(self, input):
-        """
-        Args:
-            input (dict): one dataset dict
-
-        Returns:
-            dict: one output dict
-        """
-
-        augmented_inputs, aug_vars = self._get_augmented_inputs(input)
-        # Detect boxes from all augmented versions
-        with self._turn_off_roi_heads(["mask_on", "keypoint_on"]):
-            # temporarily disable roi heads
-            all_boxes, all_scores, all_classes = self._get_augmented_boxes(
-                augmented_inputs, aug_vars
-            )
-        merged_instances = self._merge_detections(
-            all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
-        )
-
-        if self.cfg.MODEL.MASK_ON:
-            # Use the detected boxes to obtain new fields
-            augmented_instances = self._rescale_detected_boxes(
-                augmented_inputs, merged_instances, aug_vars
-            )
-            # run forward on the detected boxes
-            outputs = self._batch_inference(
-                augmented_inputs, augmented_instances, do_postprocess=False
-            )
-            # Delete now useless variables to avoid being out of memory
-            del augmented_inputs, augmented_instances, merged_instances
-            # average the predictions
-            outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
-            # postprocess
-            output = self._detector_postprocess(outputs[0], aug_vars)
-            return {"instances": output}
-        else:
-            return {"instances": merged_instances}
-
-    def _get_augmented_inputs(self, input):
-        augmented_inputs = self.tta_mapper(input)
-
-        do_hflip = [k.pop("horiz_flip", False) for k in augmented_inputs]
-        heights = [k["height"] for k in augmented_inputs]
-        widths = [k["width"] for k in augmented_inputs]
-        assert (
-            len(set(heights)) == 1 and len(set(widths)) == 1
-        ), "Augmented version of the inputs should have the same original resolution!"
-        height = heights[0]
-        width = widths[0]
-        aug_vars = {"height": height, "width": width, "do_hflip": do_hflip}
-
-        return augmented_inputs, aug_vars
-
-    def _get_augmented_boxes(self, augmented_inputs, aug_vars):
-        # 1: forward with all augmented images
-        outputs = self._batch_inference(augmented_inputs, do_postprocess=False)
-        # 2: union the results
-        all_boxes = []
-        all_scores = []
-        all_classes = []
-        for idx, output in enumerate(outputs):
-            rescaled_output = self._detector_postprocess(output, aug_vars)
-            pred_boxes = rescaled_output.pred_boxes.tensor
-            if aug_vars["do_hflip"][idx]:
-                pred_boxes[:, [0, 2]] = aug_vars["width"] - pred_boxes[:, [2, 0]]
-            all_boxes.append(pred_boxes)
-            all_scores.extend(rescaled_output.scores)
-            all_classes.extend(rescaled_output.pred_classes)
-        all_boxes = torch.cat(all_boxes, dim=0).cpu()
-        return all_boxes, all_scores, all_classes
-
-    def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw):
-        # select from the union of all results
-        num_boxes = len(all_boxes)
-        num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES
-        # +1 because fast_rcnn_inference expects background scores as well
-        all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device)
-        for idx, cls, score in zip(count(), all_classes, all_scores):
-            all_scores_2d[idx, cls] = score
-
-        merged_instances, _ = fast_rcnn_inference_single_image(
-            all_boxes,
-            all_scores_2d,
-            shape_hw,
-            1e-8,
-            self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
-            self.cfg.TEST.DETECTIONS_PER_IMAGE,
-        )
-
-        return merged_instances
-
-    def _rescale_detected_boxes(self, augmented_inputs, merged_instances, aug_vars):
-        augmented_instances = []
-        for idx, input in enumerate(augmented_inputs):
-            actual_height, actual_width = input["image"].shape[1:3]
-            scale_x = actual_width * 1.0 / aug_vars["width"]
-            scale_y = actual_height * 1.0 / aug_vars["height"]
-            pred_boxes = merged_instances.pred_boxes.clone()
-            pred_boxes.tensor[:, 0::2] *= scale_x
-            pred_boxes.tensor[:, 1::2] *= scale_y
-            if aug_vars["do_hflip"][idx]:
-                pred_boxes.tensor[:, [0, 2]] = actual_width - pred_boxes.tensor[:, [2, 0]]
-
-            aug_instances = Instances(
-                image_size=(actual_height, actual_width),
-                pred_boxes=pred_boxes,
-                pred_classes=merged_instances.pred_classes,
-                scores=merged_instances.scores,
-            )
-            augmented_instances.append(aug_instances)
-        return augmented_instances
-
-    def _reduce_pred_masks(self, outputs, aug_vars):
-        for idx, output in enumerate(outputs):
-            if aug_vars["do_hflip"][idx]:
-                output.pred_masks = output.pred_masks.flip(dims=[3])
-        all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0)
-        avg_pred_masks = torch.mean(all_pred_masks, dim=0)
-        return avg_pred_masks
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py
deleted file mode 100644
index 10f84e1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .build import build_lr_scheduler, build_optimizer
-from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py
deleted file mode 100644
index 6d9d0ee..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from enum import Enum
-from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union
-import torch
-
-from detectron2.config import CfgNode
-
-from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
-
-_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]]
-_GradientClipper = Callable[[_GradientClipperInput], None]
-
-
-class GradientClipType(Enum):
-    VALUE = "value"
-    NORM = "norm"
-
-
-def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper:
-    """
-    Creates gradient clipping closure to clip by value or by norm,
-    according to the provided config.
-    """
-    cfg = cfg.clone()
-
-    def clip_grad_norm(p: _GradientClipperInput):
-        torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE)
-
-    def clip_grad_value(p: _GradientClipperInput):
-        torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE)
-
-    _GRADIENT_CLIP_TYPE_TO_CLIPPER = {
-        GradientClipType.VALUE: clip_grad_value,
-        GradientClipType.NORM: clip_grad_norm,
-    }
-    return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)]
-
-
-def _generate_optimizer_class_with_gradient_clipping(
-    optimizer_type: Type[torch.optim.Optimizer], gradient_clipper: _GradientClipper
-) -> Type[torch.optim.Optimizer]:
-    """
-    Dynamically creates a new type that inherits the type of a given instance
-    and overrides the `step` method to add gradient clipping
-    """
-
-    def optimizer_wgc_step(self, closure=None):
-        for group in self.param_groups:
-            for p in group["params"]:
-                gradient_clipper(p)
-        super(type(self), self).step(closure)
-
-    OptimizerWithGradientClip = type(
-        optimizer_type.__name__ + "WithGradientClip",
-        (optimizer_type,),
-        {"step": optimizer_wgc_step},
-    )
-    return OptimizerWithGradientClip
-
-
-def maybe_add_gradient_clipping(
-    cfg: CfgNode, optimizer: torch.optim.Optimizer
-) -> torch.optim.Optimizer:
-    """
-    If gradient clipping is enabled through config options, wraps the existing
-    optimizer instance of some type OptimizerType to become an instance
-    of the new dynamically created class OptimizerTypeWithGradientClip
-    that inherits OptimizerType and overrides the `step` method to
-    include gradient clipping.
-
-    Args:
-        cfg: CfgNode
-            configuration options
-        optimizer: torch.optim.Optimizer
-            existing optimizer instance
-
-    Return:
-        optimizer: torch.optim.Optimizer
-            either the unmodified optimizer instance (if gradient clipping is
-            disabled), or the same instance with adjusted __class__ to override
-            the `step` method and include gradient clipping
-    """
-    if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED:
-        return optimizer
-    grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS)
-    OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping(
-        type(optimizer), grad_clipper
-    )
-    optimizer.__class__ = OptimizerWithGradientClip
-    return optimizer
-
-
-def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
-    """
-    Build an optimizer from config.
-    """
-    norm_module_types = (
-        torch.nn.BatchNorm1d,
-        torch.nn.BatchNorm2d,
-        torch.nn.BatchNorm3d,
-        torch.nn.SyncBatchNorm,
-        # NaiveSyncBatchNorm inherits from BatchNorm2d
-        torch.nn.GroupNorm,
-        torch.nn.InstanceNorm1d,
-        torch.nn.InstanceNorm2d,
-        torch.nn.InstanceNorm3d,
-        torch.nn.LayerNorm,
-        torch.nn.LocalResponseNorm,
-    )
-    params: List[Dict[str, Any]] = []
-    memo: Set[torch.nn.parameter.Parameter] = set()
-    for module in model.modules():
-        for key, value in module.named_parameters(recurse=False):
-            if not value.requires_grad:
-                continue
-            # Avoid duplicating parameters
-            if value in memo:
-                continue
-            memo.add(value)
-            lr = cfg.SOLVER.BASE_LR
-            weight_decay = cfg.SOLVER.WEIGHT_DECAY
-            if isinstance(module, norm_module_types):
-                weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM
-            elif key == "bias":
-                # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0
-                # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer
-                # hyperparameters are by default exactly the same as for regular
-                # weights.
-                lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
-                weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
-            params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
-
-    optimizer = torch.optim.SGD(
-        params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV
-    )
-    optimizer = maybe_add_gradient_clipping(cfg, optimizer)
-    return optimizer
-
-
-def build_lr_scheduler(
-    cfg: CfgNode, optimizer: torch.optim.Optimizer
-) -> torch.optim.lr_scheduler._LRScheduler:
-    """
-    Build a LR scheduler from config.
-    """
-    name = cfg.SOLVER.LR_SCHEDULER_NAME
-    if name == "WarmupMultiStepLR":
-        return WarmupMultiStepLR(
-            optimizer,
-            cfg.SOLVER.STEPS,
-            cfg.SOLVER.GAMMA,
-            warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
-            warmup_iters=cfg.SOLVER.WARMUP_ITERS,
-            warmup_method=cfg.SOLVER.WARMUP_METHOD,
-        )
-    elif name == "WarmupCosineLR":
-        return WarmupCosineLR(
-            optimizer,
-            cfg.SOLVER.MAX_ITER,
-            warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
-            warmup_iters=cfg.SOLVER.WARMUP_ITERS,
-            warmup_method=cfg.SOLVER.WARMUP_METHOD,
-        )
-    else:
-        raise ValueError("Unknown LR scheduler: {}".format(name))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py
deleted file mode 100644
index 6148d86..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from bisect import bisect_right
-from typing import List
-import torch
-
-# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
-# only on epoch boundaries. We typically use iteration based schedules instead.
-# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
-# "iteration" instead.
-
-# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
-# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
-
-
-class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
-    def __init__(
-        self,
-        optimizer: torch.optim.Optimizer,
-        milestones: List[int],
-        gamma: float = 0.1,
-        warmup_factor: float = 0.001,
-        warmup_iters: int = 1000,
-        warmup_method: str = "linear",
-        last_epoch: int = -1,
-    ):
-        if not list(milestones) == sorted(milestones):
-            raise ValueError(
-                "Milestones should be a list of" " increasing integers. Got {}", milestones
-            )
-        self.milestones = milestones
-        self.gamma = gamma
-        self.warmup_factor = warmup_factor
-        self.warmup_iters = warmup_iters
-        self.warmup_method = warmup_method
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self) -> List[float]:
-        warmup_factor = _get_warmup_factor_at_iter(
-            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
-        )
-        return [
-            base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
-            for base_lr in self.base_lrs
-        ]
-
-    def _compute_values(self) -> List[float]:
-        # The new interface
-        return self.get_lr()
-
-
-class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
-    def __init__(
-        self,
-        optimizer: torch.optim.Optimizer,
-        max_iters: int,
-        warmup_factor: float = 0.001,
-        warmup_iters: int = 1000,
-        warmup_method: str = "linear",
-        last_epoch: int = -1,
-    ):
-        self.max_iters = max_iters
-        self.warmup_factor = warmup_factor
-        self.warmup_iters = warmup_iters
-        self.warmup_method = warmup_method
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self) -> List[float]:
-        warmup_factor = _get_warmup_factor_at_iter(
-            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
-        )
-        # Different definitions of half-cosine with warmup are possible. For
-        # simplicity we multiply the standard half-cosine schedule by the warmup
-        # factor. An alternative is to start the period of the cosine at warmup_iters
-        # instead of at 0. In the case that warmup_iters << max_iters the two are
-        # very close to each other.
-        return [
-            base_lr
-            * warmup_factor
-            * 0.5
-            * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
-            for base_lr in self.base_lrs
-        ]
-
-    def _compute_values(self) -> List[float]:
-        # The new interface
-        return self.get_lr()
-
-
-def _get_warmup_factor_at_iter(
-    method: str, iter: int, warmup_iters: int, warmup_factor: float
-) -> float:
-    """
-    Return the learning rate warmup factor at a specific iteration.
-    See :paper:`in1k1h` for more details.
-
-    Args:
-        method (str): warmup method; either "constant" or "linear".
-        iter (int): iteration at which to calculate the warmup factor.
-        warmup_iters (int): the number of warmup iterations.
-        warmup_factor (float): the base warmup factor (the meaning changes according
-            to the method used).
-
-    Returns:
-        float: the effective warmup factor at the given iteration.
-    """
-    if iter >= warmup_iters:
-        return 1.0
-
-    if method == "constant":
-        return warmup_factor
-    elif method == "linear":
-        alpha = iter / warmup_iters
-        return warmup_factor * (1 - alpha) + alpha
-    else:
-        raise ValueError("Unknown warmup method: {}".format(method))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py
deleted file mode 100644
index 618f526..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .boxes import Boxes, BoxMode, pairwise_iou
-from .image_list import ImageList
-
-from .instances import Instances
-from .keypoints import Keypoints, heatmaps_to_keypoints
-from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask
-from .rotated_boxes import RotatedBoxes
-from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py
deleted file mode 100644
index e625803..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py
+++ /dev/null
@@ -1,367 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-import numpy as np
-from enum import IntEnum, unique
-from typing import Iterator, List, Tuple, Union
-import torch
-
-_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
-
-
-@unique
-class BoxMode(IntEnum):
-    """
-    Enum of different ways to represent a box.
-    """
-
-    XYXY_ABS = 0
-    """
-    (x0, y0, x1, y1) in absolute floating points coordinates.
-    The coordinates in range [0, width or height].
-    """
-    XYWH_ABS = 1
-    """
-    (x0, y0, w, h) in absolute floating points coordinates.
-    """
-    XYXY_REL = 2
-    """
-    Not yet supported!
-    (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
-    """
-    XYWH_REL = 3
-    """
-    Not yet supported!
-    (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
-    """
-    XYWHA_ABS = 4
-    """
-    (xc, yc, w, h, a) in absolute floating points coordinates.
-    (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
-    """
-
-    @staticmethod
-    def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
-        """
-        Args:
-            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
-            from_mode, to_mode (BoxMode)
-
-        Returns:
-            The converted box of the same type.
-        """
-        if from_mode == to_mode:
-            return box
-
-        original_type = type(box)
-        is_numpy = isinstance(box, np.ndarray)
-        single_box = isinstance(box, (list, tuple))
-        if single_box:
-            assert len(box) == 4 or len(box) == 5, (
-                "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
-                " where k == 4 or 5"
-            )
-            arr = torch.tensor(box)[None, :]
-        else:
-            # avoid modifying the input box
-            if is_numpy:
-                arr = torch.from_numpy(np.asarray(box)).clone()
-            else:
-                arr = box.clone()
-
-        assert to_mode.value not in [
-            BoxMode.XYXY_REL,
-            BoxMode.XYWH_REL,
-        ] and from_mode.value not in [
-            BoxMode.XYXY_REL,
-            BoxMode.XYWH_REL,
-        ], "Relative mode not yet supported!"
-
-        if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
-            assert (
-                arr.shape[-1] == 5
-            ), "The last dimension of input shape must be 5 for XYWHA format"
-            original_dtype = arr.dtype
-            arr = arr.double()
-
-            w = arr[:, 2]
-            h = arr[:, 3]
-            a = arr[:, 4]
-            c = torch.abs(torch.cos(a * math.pi / 180.0))
-            s = torch.abs(torch.sin(a * math.pi / 180.0))
-            # This basically computes the horizontal bounding rectangle of the rotated box
-            new_w = c * w + s * h
-            new_h = c * h + s * w
-
-            # convert center to top-left corner
-            arr[:, 0] -= new_w / 2.0
-            arr[:, 1] -= new_h / 2.0
-            # bottom-right corner
-            arr[:, 2] = arr[:, 0] + new_w
-            arr[:, 3] = arr[:, 1] + new_h
-
-            arr = arr[:, :4].to(dtype=original_dtype)
-        elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
-            original_dtype = arr.dtype
-            arr = arr.double()
-            arr[:, 0] += arr[:, 2] / 2.0
-            arr[:, 1] += arr[:, 3] / 2.0
-            angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
-            arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
-        else:
-            if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
-                arr[:, 2] += arr[:, 0]
-                arr[:, 3] += arr[:, 1]
-            elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
-                arr[:, 2] -= arr[:, 0]
-                arr[:, 3] -= arr[:, 1]
-            else:
-                raise NotImplementedError(
-                    "Conversion from BoxMode {} to {} is not supported yet".format(
-                        from_mode, to_mode
-                    )
-                )
-
-        if single_box:
-            return original_type(arr.flatten().tolist())
-        if is_numpy:
-            return arr.numpy()
-        else:
-            return arr
-
-
-class Boxes:
-    """
-    This structure stores a list of boxes as a Nx4 torch.Tensor.
-    It supports some common methods about boxes
-    (`area`, `clip`, `nonempty`, etc),
-    and also behaves like a Tensor
-    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
-
-    Attributes:
-        tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
-    """
-
-    BoxSizeType = Union[List[int], Tuple[int, int]]
-
-    def __init__(self, tensor: torch.Tensor):
-        """
-        Args:
-            tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
-        """
-        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
-        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
-        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that does not depend on
-            # the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, 4)).to(dtype=torch.float32, device=device)
-        assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
-
-        self.tensor = tensor
-
-    def clone(self) -> "Boxes":
-        """
-        Clone the Boxes.
-
-        Returns:
-            Boxes
-        """
-        return Boxes(self.tensor.clone())
-
-    def to(self, device: str) -> "Boxes":
-        return Boxes(self.tensor.to(device))
-
-    def area(self) -> torch.Tensor:
-        """
-        Computes the area of all the boxes.
-
-        Returns:
-            torch.Tensor: a vector with areas of each box.
-        """
-        box = self.tensor
-        area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
-        return area
-
-    def clip(self, box_size: BoxSizeType) -> None:
-        """
-        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
-        and y coordinates to the range [0, height].
-
-        Args:
-            box_size (height, width): The clipping box's size.
-        """
-        assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
-        h, w = box_size
-        self.tensor[:, 0].clamp_(min=0, max=w)
-        self.tensor[:, 1].clamp_(min=0, max=h)
-        self.tensor[:, 2].clamp_(min=0, max=w)
-        self.tensor[:, 3].clamp_(min=0, max=h)
-
-    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
-        """
-        Find boxes that are non-empty.
-        A box is considered empty, if either of its side is no larger than threshold.
-
-        Returns:
-            Tensor:
-                a binary vector which represents whether each box is empty
-                (False) or non-empty (True).
-        """
-        box = self.tensor
-        widths = box[:, 2] - box[:, 0]
-        heights = box[:, 3] - box[:, 1]
-        keep = (widths > threshold) & (heights > threshold)
-        return keep
-
-    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Boxes":
-        """
-        Returns:
-            Boxes: Create a new :class:`Boxes` by indexing.
-
-        The following usage are allowed:
-
-        1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
-        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
-        3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
-           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
-
-        Note that the returned Boxes might share storage with this Boxes,
-        subject to Pytorch's indexing semantics.
-        """
-        if isinstance(item, int):
-            return Boxes(self.tensor[item].view(1, -1))
-        b = self.tensor[item]
-        assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
-        return Boxes(b)
-
-    def __len__(self) -> int:
-        return self.tensor.shape[0]
-
-    def __repr__(self) -> str:
-        return "Boxes(" + str(self.tensor) + ")"
-
-    def inside_box(self, box_size: BoxSizeType, boundary_threshold: int = 0) -> torch.Tensor:
-        """
-        Args:
-            box_size (height, width): Size of the reference box.
-            boundary_threshold (int): Boxes that extend beyond the reference box
-                boundary by more than boundary_threshold are considered "outside".
-
-        Returns:
-            a binary vector, indicating whether each box is inside the reference box.
-        """
-        height, width = box_size
-        inds_inside = (
-            (self.tensor[..., 0] >= -boundary_threshold)
-            & (self.tensor[..., 1] >= -boundary_threshold)
-            & (self.tensor[..., 2] < width + boundary_threshold)
-            & (self.tensor[..., 3] < height + boundary_threshold)
-        )
-        return inds_inside
-
-    def get_centers(self) -> torch.Tensor:
-        """
-        Returns:
-            The box centers in a Nx2 array of (x, y).
-        """
-        return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
-
-    def scale(self, scale_x: float, scale_y: float) -> None:
-        """
-        Scale the box with horizontal and vertical scaling factors
-        """
-        self.tensor[:, 0::2] *= scale_x
-        self.tensor[:, 1::2] *= scale_y
-
-    @classmethod
-    def cat(cls, boxes_list: List["Boxes"]) -> "Boxes":
-        """
-        Concatenates a list of Boxes into a single Boxes
-
-        Arguments:
-            boxes_list (list[Boxes])
-
-        Returns:
-            Boxes: the concatenated Boxes
-        """
-        assert isinstance(boxes_list, (list, tuple))
-        if len(boxes_list) == 0:
-            return cls(torch.empty(0))
-        assert all(isinstance(box, Boxes) for box in boxes_list)
-
-        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
-        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
-        return cat_boxes
-
-    @property
-    def device(self) -> torch.device:
-        return self.tensor.device
-
-    def __iter__(self) -> Iterator[torch.Tensor]:
-        """
-        Yield a box as a Tensor of shape (4,) at a time.
-        """
-        yield from self.tensor
-
-
-# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-# with slight modifications
-def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
-    """
-    Given two lists of boxes of size N and M,
-    compute the IoU (intersection over union)
-    between __all__ N x M pairs of boxes.
-    The box order must be (xmin, ymin, xmax, ymax).
-
-    Args:
-        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
-
-    Returns:
-        Tensor: IoU, sized [N,M].
-    """
-    area1 = boxes1.area()
-    area2 = boxes2.area()
-
-    boxes1, boxes2 = boxes1.tensor, boxes2.tensor
-
-    width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
-        boxes1[:, None, :2], boxes2[:, :2]
-    )  # [N,M,2]
-
-    width_height.clamp_(min=0)  # [N,M,2]
-    inter = width_height.prod(dim=2)  # [N,M]
-    del width_height
-
-    # handle empty boxes
-    iou = torch.where(
-        inter > 0,
-        inter / (area1[:, None] + area2 - inter),
-        torch.zeros(1, dtype=inter.dtype, device=inter.device),
-    )
-    return iou
-
-
-def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
-    """
-    Compute pairwise intersection over union (IOU) of two sets of matched
-    boxes. The box order must be (xmin, ymin, xmax, ymax).
-    Similar to boxlist_iou, but computes only diagonal elements of the matrix
-    Arguments:
-        boxes1: (Boxes) bounding boxes, sized [N,4].
-        boxes2: (Boxes) bounding boxes, sized [N,4].
-    Returns:
-        (tensor) iou, sized [N].
-    """
-    assert len(boxes1) == len(
-        boxes2
-    ), "boxlists should have the same" "number of entries, got {}, {}".format(
-        len(boxes1), len(boxes2)
-    )
-    area1 = boxes1.area()  # [N]
-    area2 = boxes2.area()  # [N]
-    box1, box2 = boxes1.tensor, boxes2.tensor
-    lt = torch.max(box1[:, :2], box2[:, :2])  # [N,2]
-    rb = torch.min(box1[:, 2:], box2[:, 2:])  # [N,2]
-    wh = (rb - lt).clamp(min=0)  # [N,2]
-    inter = wh[:, 0] * wh[:, 1]  # [N]
-    iou = inter / (area1 + area2 - inter)  # [N]
-    return iou
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py
deleted file mode 100644
index 2d89224..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-from __future__ import division
-from typing import Any, List, Sequence, Tuple, Union
-import torch
-from torch.nn import functional as F
-
-
-class ImageList(object):
-    """
-    Structure that holds a list of images (of possibly
-    varying sizes) as a single tensor.
-    This works by padding the images to the same size,
-    and storing in a field the original sizes of each image
-
-    Attributes:
-        image_sizes (list[tuple[int, int]]): each tuple is (h, w)
-    """
-
-    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
-        """
-        Arguments:
-            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
-            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
-                be smaller than (H, W) due to padding.
-        """
-        self.tensor = tensor
-        self.image_sizes = image_sizes
-
-    def __len__(self) -> int:
-        return len(self.image_sizes)
-
-    def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor:
-        """
-        Access the individual image in its original size.
-
-        Returns:
-            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
-        """
-        size = self.image_sizes[idx]
-        return self.tensor[idx, ..., : size[0], : size[1]]  # type: ignore
-
-    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
-        cast_tensor = self.tensor.to(*args, **kwargs)
-        return ImageList(cast_tensor, self.image_sizes)
-
-    @property
-    def device(self) -> torch.device:
-        return self.tensor.device
-
-    @staticmethod
-    def from_tensors(
-        tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
-    ) -> "ImageList":
-        """
-        Args:
-            tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
-                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
-                to the same shape with `pad_value`.
-            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
-                the common height and width is divisible by `size_divisibility`.
-                This depends on the model and many models need a divisibility of 32.
-            pad_value (float): value to pad
-
-        Returns:
-            an `ImageList`.
-        """
-        assert len(tensors) > 0
-        assert isinstance(tensors, (tuple, list))
-        for t in tensors:
-            assert isinstance(t, torch.Tensor), type(t)
-            assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
-        # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors
-        max_size = (
-            # In tracing mode, x.shape[i] is Tensor, and should not be converted
-            # to int: this will cause the traced graph to have hard-coded shapes.
-            # Instead we should make max_size a Tensor that depends on these tensors.
-            # Using torch.stack twice seems to be the best way to convert
-            # list[list[ScalarTensor]] to a Tensor
-            torch.stack(
-                [
-                    torch.stack([torch.as_tensor(dim) for dim in size])
-                    for size in [tuple(img.shape) for img in tensors]
-                ]
-            )
-            .max(0)
-            .values
-        )
-
-        if size_divisibility > 0:
-            stride = size_divisibility
-            # the last two dims are H,W, both subject to divisibility requirement
-            max_size = torch.cat([max_size[:-2], (max_size[-2:] + (stride - 1)) // stride * stride])
-
-        image_sizes = [tuple(im.shape[-2:]) for im in tensors]
-
-        if len(tensors) == 1:
-            # This seems slightly (2%) faster.
-            # TODO: check whether it's faster for multiple images as well
-            image_size = image_sizes[0]
-            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
-            if all(x == 0 for x in padding_size):  # https://github.com/pytorch/pytorch/issues/31734
-                batched_imgs = tensors[0].unsqueeze(0)
-            else:
-                padded = F.pad(tensors[0], padding_size, value=pad_value)
-                batched_imgs = padded.unsqueeze_(0)
-        else:
-            # max_size can be a tensor in tracing mode, therefore use tuple()
-            batch_shape = (len(tensors),) + tuple(max_size)
-            batched_imgs = tensors[0].new_full(batch_shape, pad_value)
-            for img, pad_img in zip(tensors, batched_imgs):
-                pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
-
-        return ImageList(batched_imgs.contiguous(), image_sizes)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py
deleted file mode 100644
index 373de08..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import itertools
-from typing import Any, Dict, List, Tuple, Union
-import torch
-
-
-class Instances:
-    """
-    This class represents a list of instances in an image.
-    It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
-    All fields must have the same ``__len__`` which is the number of instances.
-
-    All other (non-field) attributes of this class are considered private:
-    they must start with '_' and are not modifiable by a user.
-
-    Some basic usage:
-
-    1. Set/Get a field:
-
-       .. code-block:: python
-
-          instances.gt_boxes = Boxes(...)
-          print(instances.pred_masks)  # a tensor of shape (N, H, W)
-          print('gt_masks' in instances)
-
-    2. ``len(instances)`` returns the number of instances
-    3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
-       and returns a new :class:`Instances`.
-       Typically, ``indices`` is a integer vector of indices,
-       or a binary mask of length ``num_instances``,
-    """
-
-    def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
-        """
-        Args:
-            image_size (height, width): the spatial size of the image.
-            kwargs: fields to add to this `Instances`.
-        """
-        self._image_size = image_size
-        self._fields: Dict[str, Any] = {}
-        for k, v in kwargs.items():
-            self.set(k, v)
-
-    @property
-    def image_size(self) -> Tuple[int, int]:
-        """
-        Returns:
-            tuple: height, width
-        """
-        return self._image_size
-
-    def __setattr__(self, name: str, val: Any) -> None:
-        if name.startswith("_"):
-            super().__setattr__(name, val)
-        else:
-            self.set(name, val)
-
-    def __getattr__(self, name: str) -> Any:
-        if name == "_fields" or name not in self._fields:
-            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
-        return self._fields[name]
-
-    def set(self, name: str, value: Any) -> None:
-        """
-        Set the field named `name` to `value`.
-        The length of `value` must be the number of instances,
-        and must agree with other existing fields in this object.
-        """
-        data_len = len(value)
-        if len(self._fields):
-            assert (
-                len(self) == data_len
-            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
-        self._fields[name] = value
-
-    def has(self, name: str) -> bool:
-        """
-        Returns:
-            bool: whether the field called `name` exists.
-        """
-        return name in self._fields
-
-    def remove(self, name: str) -> None:
-        """
-        Remove the field called `name`.
-        """
-        del self._fields[name]
-
-    def get(self, name: str) -> Any:
-        """
-        Returns the field called `name`.
-        """
-        return self._fields[name]
-
-    def get_fields(self) -> Dict[str, Any]:
-        """
-        Returns:
-            dict: a dict which maps names (str) to data of the fields
-
-        Modifying the returned dict will modify this instance.
-        """
-        return self._fields
-
-    # Tensor-like methods
-    def to(self, device: str) -> "Instances":
-        """
-        Returns:
-            Instances: all fields are called with a `to(device)`, if the field has this method.
-        """
-        ret = Instances(self._image_size)
-        for k, v in self._fields.items():
-            if hasattr(v, "to"):
-                v = v.to(device)
-            ret.set(k, v)
-        return ret
-
-    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
-        """
-        Args:
-            item: an index-like object and will be used to index all the fields.
-
-        Returns:
-            If `item` is a string, return the data in the corresponding field.
-            Otherwise, returns an `Instances` where all fields are indexed by `item`.
-        """
-        if type(item) == int:
-            if item >= len(self) or item < -len(self):
-                raise IndexError("Instances index out of range!")
-            else:
-                item = slice(item, None, len(self))
-
-        ret = Instances(self._image_size)
-        for k, v in self._fields.items():
-            ret.set(k, v[item])
-        return ret
-
-    def __len__(self) -> int:
-        for v in self._fields.values():
-            return len(v)
-        raise NotImplementedError("Empty Instances does not support __len__!")
-
-    def __iter__(self):
-        raise NotImplementedError("`Instances` object is not iterable!")
-
-    @staticmethod
-    def cat(instance_lists: List["Instances"]) -> "Instances":
-        """
-        Args:
-            instance_lists (list[Instances])
-
-        Returns:
-            Instances
-        """
-        assert all(isinstance(i, Instances) for i in instance_lists)
-        assert len(instance_lists) > 0
-        if len(instance_lists) == 1:
-            return instance_lists[0]
-
-        image_size = instance_lists[0].image_size
-        for i in instance_lists[1:]:
-            assert i.image_size == image_size
-        ret = Instances(image_size)
-        for k in instance_lists[0]._fields.keys():
-            values = [i.get(k) for i in instance_lists]
-            v0 = values[0]
-            if isinstance(v0, torch.Tensor):
-                values = torch.cat(values, dim=0)
-            elif isinstance(v0, list):
-                values = list(itertools.chain(*values))
-            elif hasattr(type(v0), "cat"):
-                values = type(v0).cat(values)
-            else:
-                raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
-            ret.set(k, values)
-        return ret
-
-    def __str__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_instances={}, ".format(len(self))
-        s += "image_height={}, ".format(self._image_size[0])
-        s += "image_width={}, ".format(self._image_size[1])
-        s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
-        return s
-
-    __repr__ = __str__
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py
deleted file mode 100644
index 2242815..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-from typing import Any, List, Tuple, Union
-import torch
-
-from detectron2.layers import interpolate
-
-
-class Keypoints:
-    """
-    Stores keypoint annotation data. GT Instances have a `gt_keypoints` property
-    containing the x,y location and visibility flag of each keypoint. This tensor has shape
-    (N, K, 3) where N is the number of instances and K is the number of keypoints per instance.
-
-    The visibility flag follows the COCO format and must be one of three integers:
-    * v=0: not labeled (in which case x=y=0)
-    * v=1: labeled but not visible
-    * v=2: labeled and visible
-    """
-
-    def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
-        """
-        Arguments:
-            keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
-                The shape should be (N, K, 3) where N is the number of
-                instances, and K is the number of keypoints per instance.
-        """
-        device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu")
-        keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
-        assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
-        self.tensor = keypoints
-
-    def __len__(self) -> int:
-        return self.tensor.size(0)
-
-    def to(self, *args: Any, **kwargs: Any) -> "Keypoints":
-        return type(self)(self.tensor.to(*args, **kwargs))
-
-    @property
-    def device(self) -> torch.device:
-        return self.tensor.device
-
-    def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
-        """
-        Arguments:
-            boxes: Nx4 tensor, the boxes to draw the keypoints to
-
-        Returns:
-            heatmaps:
-                A tensor of shape (N, K) containing an integer spatial label
-                in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
-            valid:
-                A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
-        """
-        return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)
-
-    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
-        """
-        Create a new `Keypoints` by indexing on this `Keypoints`.
-
-        The following usage are allowed:
-
-        1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
-        2. `new_kpts = kpts[2:10]`: return a slice of key points.
-        3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
-           with `length = len(kpts)`. Nonzero elements in the vector will be selected.
-
-        Note that the returned Keypoints might share storage with this Keypoints,
-        subject to Pytorch's indexing semantics.
-        """
-        if isinstance(item, int):
-            return Keypoints([self.tensor[item]])
-        return Keypoints(self.tensor[item])
-
-    def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_instances={})".format(len(self.tensor))
-        return s
-
-
-# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop)
-def _keypoints_to_heatmap(
-    keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.
-
-    Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
-    closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
-    continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
-    d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
-
-    Arguments:
-        keypoints: tensor of keypoint locations in of shape (N, K, 3).
-        rois: Nx4 tensor of rois in xyxy format
-        heatmap_size: integer side length of square heatmap.
-
-    Returns:
-        heatmaps: A tensor of shape (N, K) containing an integer spatial label
-            in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
-        valid: A tensor of shape (N, K) containing whether each keypoint is in
-            the roi or not.
-    """
-
-    if rois.numel() == 0:
-        return rois.new().long(), rois.new().long()
-    offset_x = rois[:, 0]
-    offset_y = rois[:, 1]
-    scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
-    scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
-
-    offset_x = offset_x[:, None]
-    offset_y = offset_y[:, None]
-    scale_x = scale_x[:, None]
-    scale_y = scale_y[:, None]
-
-    x = keypoints[..., 0]
-    y = keypoints[..., 1]
-
-    x_boundary_inds = x == rois[:, 2][:, None]
-    y_boundary_inds = y == rois[:, 3][:, None]
-
-    x = (x - offset_x) * scale_x
-    x = x.floor().long()
-    y = (y - offset_y) * scale_y
-    y = y.floor().long()
-
-    x[x_boundary_inds] = heatmap_size - 1
-    y[y_boundary_inds] = heatmap_size - 1
-
-    valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
-    vis = keypoints[..., 2] > 0
-    valid = (valid_loc & vis).long()
-
-    lin_ind = y * heatmap_size + x
-    heatmaps = lin_ind * valid
-
-    return heatmaps, valid
-
-
-@torch.no_grad()
-def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
-    """
-    Extract predicted keypoint locations from heatmaps.
-
-    Args:
-        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
-            each ROI and each keypoint.
-        rois (Tensor): (#ROIs, 4). The box of each ROI.
-
-    Returns:
-        Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
-        (x, y, logit, score) for each keypoint.
-
-    When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
-    we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
-    Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
-    """
-    offset_x = rois[:, 0]
-    offset_y = rois[:, 1]
-
-    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
-    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
-    widths_ceil = widths.ceil()
-    heights_ceil = heights.ceil()
-
-    num_rois, num_keypoints = maps.shape[:2]
-    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)
-
-    width_corrections = widths / widths_ceil
-    height_corrections = heights / heights_ceil
-
-    keypoints_idx = torch.arange(num_keypoints, device=maps.device)
-
-    for i in range(num_rois):
-        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
-        roi_map = interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze(
-            0
-        )  # #keypoints x H x W
-
-        # softmax over the spatial region
-        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
-        max_score = max_score.view(num_keypoints, 1, 1)
-        tmp_full_resolution = (roi_map - max_score).exp_()
-        tmp_pool_resolution = (maps[i] - max_score).exp_()
-        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
-        # so that the scores of objects of different absolute sizes will be more comparable
-        roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)
-
-        w = roi_map.shape[2]
-        pos = roi_map.view(num_keypoints, -1).argmax(1)
-
-        x_int = pos % w
-        y_int = (pos - x_int) // w
-
-        assert (
-            roi_map_scores[keypoints_idx, y_int, x_int]
-            == roi_map_scores.view(num_keypoints, -1).max(1)[0]
-        ).all()
-
-        x = (x_int.float() + 0.5) * width_corrections[i]
-        y = (y_int.float() + 0.5) * height_corrections[i]
-
-        xy_preds[i, :, 0] = x + offset_x[i]
-        xy_preds[i, :, 1] = y + offset_y[i]
-        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
-        xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]
-
-    return xy_preds
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py
deleted file mode 100644
index e363baf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py
+++ /dev/null
@@ -1,424 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import itertools
-import numpy as np
-from typing import Any, Iterator, List, Union
-import pycocotools.mask as mask_utils
-import torch
-
-from detectron2.layers.roi_align import ROIAlign
-
-from .boxes import Boxes
-
-
-def polygon_area(x, y):
-    # Using the shoelace formula
-    # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
-    return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
-
-
-def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray:
-    """
-    Args:
-        polygons (list[ndarray]): each array has shape (Nx2,)
-        height, width (int)
-
-    Returns:
-        ndarray: a bool mask of shape (height, width)
-    """
-    assert len(polygons) > 0, "COCOAPI does not support empty polygons"
-    rles = mask_utils.frPyObjects(polygons, height, width)
-    rle = mask_utils.merge(rles)
-    return mask_utils.decode(rle).astype(np.bool)
-
-
-def rasterize_polygons_within_box(
-    polygons: List[np.ndarray], box: np.ndarray, mask_size: int
-) -> torch.Tensor:
-    """
-    Rasterize the polygons into a mask image and
-    crop the mask content in the given box.
-    The cropped mask is resized to (mask_size, mask_size).
-
-    This function is used when generating training targets for mask head in Mask R-CNN.
-    Given original ground-truth masks for an image, new ground-truth mask
-    training targets in the size of `mask_size x mask_size`
-    must be provided for each predicted box. This function will be called to
-    produce such targets.
-
-    Args:
-        polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
-        box: 4-element numpy array
-        mask_size (int):
-
-    Returns:
-        Tensor: BoolTensor of shape (mask_size, mask_size)
-    """
-    # 1. Shift the polygons w.r.t the boxes
-    w, h = box[2] - box[0], box[3] - box[1]
-
-    polygons = copy.deepcopy(polygons)
-    for p in polygons:
-        p[0::2] = p[0::2] - box[0]
-        p[1::2] = p[1::2] - box[1]
-
-    # 2. Rescale the polygons to the new box size
-    # max() to avoid division by small number
-    ratio_h = mask_size / max(h, 0.1)
-    ratio_w = mask_size / max(w, 0.1)
-
-    if ratio_h == ratio_w:
-        for p in polygons:
-            p *= ratio_h
-    else:
-        for p in polygons:
-            p[0::2] *= ratio_w
-            p[1::2] *= ratio_h
-
-    # 3. Rasterize the polygons with coco api
-    mask = polygons_to_bitmask(polygons, mask_size, mask_size)
-    mask = torch.from_numpy(mask)
-    return mask
-
-
-class BitMasks:
-    """
-    This class stores the segmentation masks for all objects in one image, in
-    the form of bitmaps.
-
-    Attributes:
-        tensor: bool Tensor of N,H,W, representing N instances in the image.
-    """
-
-    def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
-        """
-        Args:
-            tensor: bool Tensor of N,H,W, representing N instances in the image.
-        """
-        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
-        tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device)
-        assert tensor.dim() == 3, tensor.size()
-        self.image_size = tensor.shape[1:]
-        self.tensor = tensor
-
-    def to(self, device: str) -> "BitMasks":
-        return BitMasks(self.tensor.to(device))
-
-    @property
-    def device(self) -> torch.device:
-        return self.tensor.device
-
-    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
-        """
-        Returns:
-            BitMasks: Create a new :class:`BitMasks` by indexing.
-
-        The following usage are allowed:
-
-        1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
-        2. `new_masks = masks[2:10]`: return a slice of masks.
-        3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
-           with `length = len(masks)`. Nonzero elements in the vector will be selected.
-
-        Note that the returned object might share storage with this object,
-        subject to Pytorch's indexing semantics.
-        """
-        if isinstance(item, int):
-            return BitMasks(self.tensor[item].view(1, -1))
-        m = self.tensor[item]
-        assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
-            item, m.shape
-        )
-        return BitMasks(m)
-
-    def __iter__(self) -> torch.Tensor:
-        yield from self.tensor
-
-    def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_instances={})".format(len(self.tensor))
-        return s
-
-    def __len__(self) -> int:
-        return self.tensor.shape[0]
-
-    def nonempty(self) -> torch.Tensor:
-        """
-        Find masks that are non-empty.
-
-        Returns:
-            Tensor: a BoolTensor which represents
-                whether each mask is empty (False) or non-empty (True).
-        """
-        return self.tensor.flatten(1).any(dim=1)
-
-    @staticmethod
-    def from_polygon_masks(
-        polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int
-    ) -> "BitMasks":
-        """
-        Args:
-            polygon_masks (list[list[ndarray]] or PolygonMasks)
-            height, width (int)
-        """
-        if isinstance(polygon_masks, PolygonMasks):
-            polygon_masks = polygon_masks.polygons
-        masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
-        return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
-
-    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
-        """
-        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
-        This can be used to prepare training targets for Mask R-CNN.
-        It has less reconstruction error compared to rasterization with polygons.
-        However we observe no difference in accuracy,
-        but BitMasks requires more memory to store all the masks.
-
-        Args:
-            boxes (Tensor): Nx4 tensor storing the boxes for each mask
-            mask_size (int): the size of the rasterized mask.
-
-        Returns:
-            Tensor:
-                A bool tensor of shape (N, mask_size, mask_size), where
-                N is the number of predicted boxes for this image.
-        """
-        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
-        device = self.tensor.device
-
-        batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None]
-        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5
-
-        bit_masks = self.tensor.to(dtype=torch.float32)
-        rois = rois.to(device=device)
-        output = (
-            ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
-            .forward(bit_masks[:, None, :, :], rois)
-            .squeeze(1)
-        )
-        output = output >= 0.5
-        return output
-
-    def get_bounding_boxes(self) -> None:
-        # not needed now
-        raise NotImplementedError
-
-    @staticmethod
-    def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
-        """
-        Concatenates a list of BitMasks into a single BitMasks
-
-        Arguments:
-            bitmasks_list (list[BitMasks])
-
-        Returns:
-            BitMasks: the concatenated BitMasks
-        """
-        assert isinstance(bitmasks_list, (list, tuple))
-        assert len(bitmasks_list) > 0
-        assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
-
-        cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0))
-        return cat_bitmasks
-
-
-class PolygonMasks:
-    """
-    This class stores the segmentation masks for all objects in one image, in the form of polygons.
-
-    Attributes:
-        polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
-    """
-
-    def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
-        """
-        Arguments:
-            polygons (list[list[np.ndarray]]): The first
-                level of the list correspond to individual instances,
-                the second level to all the polygons that compose the
-                instance, and the third level to the polygon coordinates.
-                The third level array should have the format of
-                [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
-        """
-        assert isinstance(polygons, list), (
-            "Cannot create PolygonMasks: Expect a list of list of polygons per image. "
-            "Got '{}' instead.".format(type(polygons))
-        )
-
-        def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
-            # Use float64 for higher precision, because why not?
-            # Always put polygons on CPU (self.to is a no-op) since they
-            # are supposed to be small tensors.
-            # May need to change this assumption if GPU placement becomes useful
-            if isinstance(t, torch.Tensor):
-                t = t.cpu().numpy()
-            return np.asarray(t).astype("float64")
-
-        def process_polygons(
-            polygons_per_instance: List[Union[torch.Tensor, np.ndarray]]
-        ) -> List[np.ndarray]:
-            assert isinstance(polygons_per_instance, list), (
-                "Cannot create polygons: Expect a list of polygons per instance. "
-                "Got '{}' instead.".format(type(polygons_per_instance))
-            )
-            # transform the polygon to a tensor
-            polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
-            for polygon in polygons_per_instance:
-                assert len(polygon) % 2 == 0 and len(polygon) >= 6
-            return polygons_per_instance
-
-        self.polygons: List[List[np.ndarray]] = [
-            process_polygons(polygons_per_instance) for polygons_per_instance in polygons
-        ]
-
-    def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
-        return self
-
-    @property
-    def device(self) -> torch.device:
-        return torch.device("cpu")
-
-    def get_bounding_boxes(self) -> Boxes:
-        """
-        Returns:
-            Boxes: tight bounding boxes around polygon masks.
-        """
-        boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
-        for idx, polygons_per_instance in enumerate(self.polygons):
-            minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
-            maxxy = torch.zeros(2, dtype=torch.float32)
-            for polygon in polygons_per_instance:
-                coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
-                minxy = torch.min(minxy, torch.min(coords, dim=0).values)
-                maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
-            boxes[idx, :2] = minxy
-            boxes[idx, 2:] = maxxy
-        return Boxes(boxes)
-
-    def nonempty(self) -> torch.Tensor:
-        """
-        Find masks that are non-empty.
-
-        Returns:
-            Tensor:
-                a BoolTensor which represents whether each mask is empty (False) or not (True).
-        """
-        keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
-        return torch.from_numpy(np.asarray(keep, dtype=np.bool))
-
-    def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks":
-        """
-        Support indexing over the instances and return a `PolygonMasks` object.
-        `item` can be:
-
-        1. An integer. It will return an object with only one instance.
-        2. A slice. It will return an object with the selected instances.
-        3. A list[int]. It will return an object with the selected instances,
-           correpsonding to the indices in the list.
-        4. A vector mask of type BoolTensor, whose length is num_instances.
-           It will return an object with the instances whose mask is nonzero.
-        """
-        if isinstance(item, int):
-            selected_polygons = [self.polygons[item]]
-        elif isinstance(item, slice):
-            selected_polygons = self.polygons[item]
-        elif isinstance(item, list):
-            selected_polygons = [self.polygons[i] for i in item]
-        elif isinstance(item, torch.Tensor):
-            # Polygons is a list, so we have to move the indices back to CPU.
-            if item.dtype == torch.bool:
-                assert item.dim() == 1, item.shape
-                item = item.nonzero().squeeze(1).cpu().numpy().tolist()
-            elif item.dtype in [torch.int32, torch.int64]:
-                item = item.cpu().numpy().tolist()
-            else:
-                raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype))
-            selected_polygons = [self.polygons[i] for i in item]
-        return PolygonMasks(selected_polygons)
-
-    def __iter__(self) -> Iterator[List[np.ndarray]]:
-        """
-        Yields:
-            list[ndarray]: the polygons for one instance.
-            Each Tensor is a float64 vector representing a polygon.
-        """
-        return iter(self.polygons)
-
-    def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_instances={})".format(len(self.polygons))
-        return s
-
-    def __len__(self) -> int:
-        return len(self.polygons)
-
-    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
-        """
-        Crop each mask by the given box, and resize results to (mask_size, mask_size).
-        This can be used to prepare training targets for Mask R-CNN.
-
-        Args:
-            boxes (Tensor): Nx4 tensor storing the boxes for each mask
-            mask_size (int): the size of the rasterized mask.
-
-        Returns:
-            Tensor: A bool tensor of shape (N, mask_size, mask_size), where
-            N is the number of predicted boxes for this image.
-        """
-        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
-
-        device = boxes.device
-        # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
-        # (several small tensors for representing a single instance mask)
-        boxes = boxes.to(torch.device("cpu"))
-
-        results = [
-            rasterize_polygons_within_box(poly, box.numpy(), mask_size)
-            for poly, box in zip(self.polygons, boxes)
-        ]
-        """
-        poly: list[list[float]], the polygons for one instance
-        box: a tensor of shape (4,)
-        """
-        if len(results) == 0:
-            return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
-        return torch.stack(results, dim=0).to(device=device)
-
-    def area(self):
-        """
-        Computes area of the mask.
-        Only works with Polygons, using the shoelace formula:
-        https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
-
-        Returns:
-            Tensor: a vector, area for each instance
-        """
-
-        area = []
-        for polygons_per_instance in self.polygons:
-            area_per_instance = 0
-            for p in polygons_per_instance:
-                area_per_instance += polygon_area(p[0::2], p[1::2])
-            area.append(area_per_instance)
-
-        return torch.tensor(area)
-
-    @staticmethod
-    def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
-        """
-        Concatenates a list of PolygonMasks into a single PolygonMasks
-
-        Arguments:
-            polymasks_list (list[PolygonMasks])
-
-        Returns:
-            PolygonMasks: the concatenated PolygonMasks
-        """
-        assert isinstance(polymasks_list, (list, tuple))
-        assert len(polymasks_list) > 0
-        assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
-
-        cat_polymasks = type(polymasks_list[0])(
-            list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
-        )
-        return cat_polymasks
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py
deleted file mode 100644
index 823cfb6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py
+++ /dev/null
@@ -1,481 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from typing import Iterator, Union
-import torch
-
-from detectron2.layers.rotated_boxes import pairwise_iou_rotated
-
-from .boxes import Boxes
-
-
-class RotatedBoxes(Boxes):
-    """
-    This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
-    It supports some common methods about boxes
-    (`area`, `clip`, `nonempty`, etc),
-    and also behaves like a Tensor
-    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
-    """
-
-    def __init__(self, tensor: torch.Tensor):
-        """
-        Args:
-            tensor (Tensor[float]): a Nx5 matrix.  Each row is
-                (x_center, y_center, width, height, angle),
-                in which angle is represented in degrees.
-                While there's no strict range restriction for it,
-                the recommended principal range is between [-180, 180) degrees.
-
-        Assume we have a horizontal box B = (x_center, y_center, width, height),
-        where width is along the x-axis and height is along the y-axis.
-        The rotated box B_rot (x_center, y_center, width, height, angle)
-        can be seen as:
-
-        1. When angle == 0:
-           B_rot == B
-        2. When angle > 0:
-           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
-        3. When angle < 0:
-           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
-
-        Mathematically, since the right-handed coordinate system for image space
-        is (y, x), where y is top->down and x is left->right, the 4 vertices of the
-        rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
-        the vertices of the horizontal rectangle (y_i, x_i) (i = 1, 2, 3, 4)
-        in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
-        (y_c, x_c) is the center of the rectangle):
-
-        .. math::
-
-            yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
-
-            xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
-
-        which is the standard rigid-body rotation transformation.
-
-        Intuitively, the angle is
-        (1) the rotation angle from y-axis in image space
-        to the height vector (top->down in the box's local coordinate system)
-        of the box in CCW, and
-        (2) the rotation angle from x-axis in image space
-        to the width vector (left->right in the box's local coordinate system)
-        of the box in CCW.
-
-        More intuitively, consider the following horizontal box ABCD represented
-        in (x1, y1, x2, y2): (3, 2, 7, 4),
-        covering the [3, 7] x [2, 4] region of the continuous coordinate system
-        which looks like this:
-
-        .. code:: none
-
-            O--------> x
-            |
-            |  A---B
-            |  |   |
-            |  D---C
-            |
-            v y
-
-        Note that each capital letter represents one 0-dimensional geometric point
-        instead of a 'square pixel' here.
-
-        In the example above, using (x, y) to represent a point we have:
-
-        .. math::
-
-            O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
-
-        We name vector AB = vector DC as the width vector in box's local coordinate system, and
-        vector AD = vector BC as the height vector in box's local coordinate system. Initially,
-        when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
-        in the image space, respectively.
-
-        For better illustration, we denote the center of the box as E,
-
-        .. code:: none
-
-            O--------> x
-            |
-            |  A---B
-            |  | E |
-            |  D---C
-            |
-            v y
-
-        where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
-
-        Also,
-
-        .. math::
-
-            width = |AB| = |CD| = 7 - 3 = 4,
-            height = |AD| = |BC| = 4 - 2 = 2.
-
-        Therefore, the corresponding representation for the same shape in rotated box in
-        (x_center, y_center, width, height, angle) format is:
-
-        (5, 3, 4, 2, 0),
-
-        Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
-        CCW (counter-clockwise) by definition. It looks like this:
-
-        .. code:: none
-
-            O--------> x
-            |   B-C
-            |   | |
-            |   |E|
-            |   | |
-            |   A-D
-            v y
-
-        The center E is still located at the same point (5, 3), while the vertices
-        ABCD are rotated by 90 degrees CCW with regard to E:
-        A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
-
-        Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
-        vector AD or vector BC (the top->down height vector in box's local coordinate system),
-        or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
-        width vector in box's local coordinate system).
-
-        .. math::
-
-            width = |AB| = |CD| = 5 - 1 = 4,
-            height = |AD| = |BC| = 6 - 4 = 2.
-
-        Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
-        by definition? It looks like this:
-
-        .. code:: none
-
-            O--------> x
-            |   D-A
-            |   | |
-            |   |E|
-            |   | |
-            |   C-B
-            v y
-
-        The center E is still located at the same point (5, 3), while the vertices
-        ABCD are rotated by 90 degrees CW with regard to E:
-        A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
-
-        .. math::
-
-            width = |AB| = |CD| = 5 - 1 = 4,
-            height = |AD| = |BC| = 6 - 4 = 2.
-
-        This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
-        will be 1. However, these two will generate different RoI Pooling results and
-        should not be treated as an identical box.
-
-        On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
-        (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
-        identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
-        equivalent to rotating the same shape 90 degrees CW.
-
-        We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
-
-        .. code:: none
-
-            O--------> x
-            |
-            |  C---D
-            |  | E |
-            |  B---A
-            |
-            v y
-
-        .. math::
-
-            A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
-
-            width = |AB| = |CD| = 7 - 3 = 4,
-            height = |AD| = |BC| = 4 - 2 = 2.
-
-        Finally, this is a very inaccurate (heavily quantized) illustration of
-        how (5, 3, 4, 2, 60) looks like in case anyone wonders:
-
-        .. code:: none
-
-            O--------> x
-            |     B\
-            |    /  C
-            |   /E /
-            |  A  /
-            |   `D
-            v y
-
-        It's still a rectangle with center of (5, 3), width of 4 and height of 2,
-        but its angle (and thus orientation) is somewhere between
-        (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
-        """
-        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
-        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
-        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that does not depend on
-            # the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
-        assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
-
-        self.tensor = tensor
-
-    def clone(self) -> "RotatedBoxes":
-        """
-        Clone the RotatedBoxes.
-
-        Returns:
-            RotatedBoxes
-        """
-        return RotatedBoxes(self.tensor.clone())
-
-    def to(self, device: str) -> "RotatedBoxes":
-        return RotatedBoxes(self.tensor.to(device))
-
-    def area(self) -> torch.Tensor:
-        """
-        Computes the area of all the boxes.
-
-        Returns:
-            torch.Tensor: a vector with areas of each box.
-        """
-        box = self.tensor
-        area = box[:, 2] * box[:, 3]
-        return area
-
-    def normalize_angles(self) -> None:
-        """
-        Restrict angles to the range of [-180, 180) degrees
-        """
-        self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
-
-    def clip(self, box_size: Boxes.BoxSizeType, clip_angle_threshold: float = 1.0) -> None:
-        """
-        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
-        and y coordinates to the range [0, height].
-
-        For RRPN:
-        Only clip boxes that are almost horizontal with a tolerance of
-        clip_angle_threshold to maintain backward compatibility.
-
-        Rotated boxes beyond this threshold are not clipped for two reasons:
-
-        1. There are potentially multiple ways to clip a rotated box to make it
-           fit within the image.
-        2. It's tricky to make the entire rectangular box fit within the image
-           and still be able to not leave out pixels of interest.
-
-        Therefore we rely on ops like RoIAlignRotated to safely handle this.
-
-        Args:
-            box_size (height, width): The clipping box's size.
-            clip_angle_threshold:
-                Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
-                we do the clipping as horizontal boxes.
-        """
-        h, w = box_size
-
-        # normalize angles to be within (-180, 180] degrees
-        self.normalize_angles()
-
-        idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
-
-        # convert to (x1, y1, x2, y2)
-        x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
-        y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
-        x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
-        y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
-
-        # clip
-        x1.clamp_(min=0, max=w)
-        y1.clamp_(min=0, max=h)
-        x2.clamp_(min=0, max=w)
-        y2.clamp_(min=0, max=h)
-
-        # convert back to (xc, yc, w, h)
-        self.tensor[idx, 0] = (x1 + x2) / 2.0
-        self.tensor[idx, 1] = (y1 + y2) / 2.0
-        # make sure widths and heights do not increase due to numerical errors
-        self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
-        self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
-
-    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
-        """
-        Find boxes that are non-empty.
-        A box is considered empty, if either of its side is no larger than threshold.
-
-        Returns:
-            Tensor: a binary vector which represents
-            whether each box is empty (False) or non-empty (True).
-        """
-        box = self.tensor
-        widths = box[:, 2]
-        heights = box[:, 3]
-        keep = (widths > threshold) & (heights > threshold)
-        return keep
-
-    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "RotatedBoxes":
-        """
-        Returns:
-            RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
-
-        The following usage are allowed:
-
-        1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
-        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
-        3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
-           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
-
-        Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
-        subject to Pytorch's indexing semantics.
-        """
-        if isinstance(item, int):
-            return RotatedBoxes(self.tensor[item].view(1, -1))
-        b = self.tensor[item]
-        assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format(
-            item
-        )
-        return RotatedBoxes(b)
-
-    def __len__(self) -> int:
-        return self.tensor.shape[0]
-
-    def __repr__(self) -> str:
-        return "RotatedBoxes(" + str(self.tensor) + ")"
-
-    def inside_box(self, box_size: Boxes.BoxSizeType, boundary_threshold: int = 0) -> torch.Tensor:
-        """
-        Args:
-            box_size (height, width): Size of the reference box covering
-                [0, width] x [0, height]
-            boundary_threshold (int): Boxes that extend beyond the reference box
-                boundary by more than boundary_threshold are considered "outside".
-
-        For RRPN, it might not be necessary to call this function since it's common
-        for rotated box to extend to outside of the image boundaries
-        (the clip function only clips the near-horizontal boxes)
-
-        Returns:
-            a binary vector, indicating whether each box is inside the reference box.
-        """
-        height, width = box_size
-
-        cnt_x = self.tensor[..., 0]
-        cnt_y = self.tensor[..., 1]
-        half_w = self.tensor[..., 2] / 2.0
-        half_h = self.tensor[..., 3] / 2.0
-        a = self.tensor[..., 4]
-        c = torch.abs(torch.cos(a * math.pi / 180.0))
-        s = torch.abs(torch.sin(a * math.pi / 180.0))
-        # This basically computes the horizontal bounding rectangle of the rotated box
-        max_rect_dx = c * half_w + s * half_h
-        max_rect_dy = c * half_h + s * half_w
-
-        inds_inside = (
-            (cnt_x - max_rect_dx >= -boundary_threshold)
-            & (cnt_y - max_rect_dy >= -boundary_threshold)
-            & (cnt_x + max_rect_dx < width + boundary_threshold)
-            & (cnt_y + max_rect_dy < height + boundary_threshold)
-        )
-
-        return inds_inside
-
-    def get_centers(self) -> torch.Tensor:
-        """
-        Returns:
-            The box centers in a Nx2 array of (x, y).
-        """
-        return self.tensor[:, :2]
-
-    def scale(self, scale_x: float, scale_y: float) -> None:
-        """
-        Scale the rotated box with horizontal and vertical scaling factors
-        Note: when scale_factor_x != scale_factor_y,
-        the rotated box does not preserve the rectangular shape when the angle
-        is not a multiple of 90 degrees under resize transformation.
-        Instead, the shape is a parallelogram (that has skew)
-        Here we make an approximation by fitting a rotated rectangle to the parallelogram.
-        """
-        self.tensor[:, 0] *= scale_x
-        self.tensor[:, 1] *= scale_y
-        theta = self.tensor[:, 4] * math.pi / 180.0
-        c = torch.cos(theta)
-        s = torch.sin(theta)
-
-        # In image space, y is top->down and x is left->right
-        # Consider the local coordintate system for the rotated box,
-        # where the box center is located at (0, 0), and the four vertices ABCD are
-        # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
-        # the midpoint of the left edge AD of the rotated box E is:
-        # E = (A+D)/2 = (-w / 2, 0)
-        # the midpoint of the top edge AB of the rotated box F is:
-        # F(0, -h / 2)
-        # To get the old coordinates in the global system, apply the rotation transformation
-        # (Note: the right-handed coordinate system for image space is yOx):
-        # (old_x, old_y) = (s * y + c * x, c * y - s * x)
-        # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
-        # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
-        # After applying the scaling factor (sfx, sfy):
-        # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
-        # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
-        # The new width after scaling tranformation becomes:
-
-        # w(new) = |E(new) - O| * 2
-        #        = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
-        #        = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
-        # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
-        #
-        # For example,
-        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
-        # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
-        self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
-
-        # h(new) = |F(new) - O| * 2
-        #        = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
-        #        = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
-        # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
-        #
-        # For example,
-        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
-        # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
-        self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
-
-        # The angle is the rotation angle from y-axis in image space to the height
-        # vector (top->down in the box's local coordinate system) of the box in CCW.
-        #
-        # angle(new) = angle_yOx(O - F(new))
-        #            = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
-        #            = atan2(sfx * s * h / 2, sfy * c * h / 2)
-        #            = atan2(sfx * s, sfy * c)
-        #
-        # For example,
-        # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
-        self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
-
-    @property
-    def device(self) -> str:
-        return self.tensor.device
-
-    def __iter__(self) -> Iterator[torch.Tensor]:
-        """
-        Yield a box as a Tensor of shape (5,) at a time.
-        """
-        yield from self.tensor
-
-
-def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
-    """
-    Given two lists of rotated boxes of size N and M,
-    compute the IoU (intersection over union)
-    between __all__ N x M pairs of boxes.
-    The box order must be (x_center, y_center, width, height, angle).
-
-    Args:
-        boxes1, boxes2 (RotatedBoxes):
-            two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
-
-    Returns:
-        Tensor: IoU, sized [N,M].
-    """
-
-    return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md
deleted file mode 100644
index 9765b24..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Utility functions
-
-This folder contain utility functions that are not used in the
-core library, but are useful for building models or training
-code using the config system.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py
deleted file mode 100644
index 168f997..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py
deleted file mode 100644
index c48e376..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# -*- coding: utf-8 -*-
-
-import logging
-import typing
-import torch
-from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
-from torch import nn
-
-from detectron2.structures import BitMasks, Boxes, ImageList, Instances
-
-from .logger import log_first_n
-
-__all__ = [
-    "activation_count_operators",
-    "flop_count_operators",
-    "parameter_count_table",
-    "parameter_count",
-]
-
-FLOPS_MODE = "flops"
-ACTIVATIONS_MODE = "activations"
-
-
-# some extra ops to ignore from counting.
-_IGNORED_OPS = [
-    "aten::add",
-    "aten::add_",
-    "aten::batch_norm",
-    "aten::constant_pad_nd",
-    "aten::div",
-    "aten::div_",
-    "aten::exp",
-    "aten::log2",
-    "aten::max_pool2d",
-    "aten::meshgrid",
-    "aten::mul",
-    "aten::mul_",
-    "aten::nonzero_numpy",
-    "aten::relu",
-    "aten::relu_",
-    "aten::rsub",
-    "aten::sigmoid",
-    "aten::sigmoid_",
-    "aten::softmax",
-    "aten::sort",
-    "aten::sqrt",
-    "aten::sub",
-    "aten::upsample_nearest2d",
-    "prim::PythonOp",
-    "torchvision::nms",
-]
-
-
-def flop_count_operators(
-    model: nn.Module, inputs: list, **kwargs
-) -> typing.DefaultDict[str, float]:
-    """
-    Implement operator-level flops counting using jit.
-    This is a wrapper of fvcore.nn.flop_count, that supports standard detection models
-    in detectron2.
-
-    Note:
-        The function runs the input through the model to compute flops.
-        The flops of a detection model is often input-dependent, for example,
-        the flops of box & mask head depends on the number of proposals &
-        the number of detected objects.
-        Therefore, the flops counting using a single input may not accurately
-        reflect the computation cost of a model.
-
-    Args:
-        model: a detectron2 model that takes `list[dict]` as input.
-        inputs (list[dict]): inputs to model, in detectron2's standard format.
-    """
-    return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs)
-
-
-def activation_count_operators(
-    model: nn.Module, inputs: list, **kwargs
-) -> typing.DefaultDict[str, float]:
-    """
-    Implement operator-level activations counting using jit.
-    This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
-    in detectron2.
-
-    Note:
-        The function runs the input through the model to compute activations.
-        The activations of a detection model is often input-dependent, for example,
-        the activations of box & mask head depends on the number of proposals &
-        the number of detected objects.
-
-    Args:
-        model: a detectron2 model that takes `list[dict]` as input.
-        inputs (list[dict]): inputs to model, in detectron2's standard format.
-    """
-    return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
-
-
-def _flatten_to_tuple(outputs):
-    result = []
-    if isinstance(outputs, torch.Tensor):
-        result.append(outputs)
-    elif isinstance(outputs, (list, tuple)):
-        for v in outputs:
-            result.extend(_flatten_to_tuple(v))
-    elif isinstance(outputs, dict):
-        for _, v in outputs.items():
-            result.extend(_flatten_to_tuple(v))
-    elif isinstance(outputs, Instances):
-        result.extend(_flatten_to_tuple(outputs.get_fields()))
-    elif isinstance(outputs, (Boxes, BitMasks, ImageList)):
-        result.append(outputs.tensor)
-    else:
-        log_first_n(
-            logging.WARN,
-            f"Output of type {type(outputs)} not included in flops/activations count.",
-            n=10,
-        )
-    return tuple(result)
-
-
-def _wrapper_count_operators(
-    model: nn.Module, inputs: list, mode: str, **kwargs
-) -> typing.DefaultDict[str, float]:
-
-    # ignore some ops
-    supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
-    supported_ops.update(kwargs.pop("supported_ops", {}))
-    kwargs["supported_ops"] = supported_ops
-
-    assert len(inputs) == 1, "Please use batch size=1"
-    tensor_input = inputs[0]["image"]
-
-    class WrapModel(nn.Module):
-        def __init__(self, model):
-            super().__init__()
-            if isinstance(
-                model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)
-            ):
-                self.model = model.module
-            else:
-                self.model = model
-
-        def forward(self, image):
-            # jit requires the input/output to be Tensors
-            inputs = [{"image": image}]
-            outputs = self.model.forward(inputs)
-            # Only the subgraph that computes the returned tuple of tensor will be
-            # counted. So we flatten everything we found to tuple of tensors.
-            return _flatten_to_tuple(outputs)
-
-    old_train = model.training
-    with torch.no_grad():
-        if mode == FLOPS_MODE:
-            ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
-        elif mode == ACTIVATIONS_MODE:
-            ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
-        else:
-            raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
-    # compatible with change in fvcore
-    if isinstance(ret, tuple):
-        ret = ret[0]
-    model.train(old_train)
-    return ret
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py
deleted file mode 100644
index c25b99c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import importlib
-import numpy as np
-import os
-import re
-import subprocess
-import sys
-from collections import defaultdict
-import PIL
-import torch
-import torchvision
-from tabulate import tabulate
-
-__all__ = ["collect_env_info"]
-
-
-def collect_torch_env():
-    try:
-        import torch.__config__
-
-        return torch.__config__.show()
-    except ImportError:
-        # compatible with older versions of pytorch
-        from torch.utils.collect_env import get_pretty_env_info
-
-        return get_pretty_env_info()
-
-
-def get_env_module():
-    var_name = "DETECTRON2_ENV_MODULE"
-    return var_name, os.environ.get(var_name, "<not set>")
-
-
-def detect_compute_compatibility(CUDA_HOME, so_file):
-    try:
-        cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
-        if os.path.isfile(cuobjdump):
-            output = subprocess.check_output(
-                "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True
-            )
-            output = output.decode("utf-8").strip().split("\n")
-            sm = []
-            for line in output:
-                line = re.findall(r"\.sm_[0-9]*\.", line)[0]
-                sm.append(line.strip("."))
-            sm = sorted(set(sm))
-            return ", ".join(sm)
-        else:
-            return so_file + "; cannot find cuobjdump"
-    except Exception:
-        # unhandled failure
-        return so_file
-
-
-def collect_env_info():
-    has_cuda = torch.cuda.is_available()
-    # NOTE: the use of CUDA_HOME requires the CUDA build deps, though in
-    # theory detectron2 should be made runnable with only the CUDA runtime
-    from torch.utils.cpp_extension import CUDA_HOME
-
-    data = []
-    data.append(("sys.platform", sys.platform))
-    data.append(("Python", sys.version.replace("\n", "")))
-    data.append(("numpy", np.__version__))
-
-    try:
-        import detectron2  # noqa
-
-        data.append(
-            ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__))
-        )
-    except ImportError:
-        data.append(("detectron2", "failed to import"))
-    else:
-        try:
-            from detectron2 import _C
-        except ImportError:
-            data.append(("detectron2._C", "failed to import"))
-        else:
-            data.append(("detectron2 compiler", _C.get_compiler_version()))
-            data.append(("detectron2 CUDA compiler", _C.get_cuda_version()))
-            if has_cuda:
-                data.append(
-                    ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__))
-                )
-
-    data.append(get_env_module())
-    data.append(("PyTorch", torch.__version__ + " @" + os.path.dirname(torch.__file__)))
-    data.append(("PyTorch debug build", torch.version.debug))
-
-    data.append(("CUDA available", has_cuda))
-    if has_cuda:
-        devices = defaultdict(list)
-        for k in range(torch.cuda.device_count()):
-            devices[torch.cuda.get_device_name(k)].append(str(k))
-        for name, devids in devices.items():
-            data.append(("GPU " + ",".join(devids), name))
-
-        from torch.utils.cpp_extension import CUDA_HOME
-
-        data.append(("CUDA_HOME", str(CUDA_HOME)))
-
-        if CUDA_HOME is not None and os.path.isdir(CUDA_HOME):
-            try:
-                nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
-                nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True)
-                nvcc = nvcc.decode("utf-8").strip()
-            except subprocess.SubprocessError:
-                nvcc = "Not Available"
-            data.append(("NVCC", nvcc))
-
-        cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
-        if cuda_arch_list:
-            data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
-    data.append(("Pillow", PIL.__version__))
-
-    try:
-        data.append(
-            (
-                "torchvision",
-                str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
-            )
-        )
-        if has_cuda:
-            try:
-                torchvision_C = importlib.util.find_spec("torchvision._C").origin
-                msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
-                data.append(("torchvision arch flags", msg))
-            except ImportError:
-                data.append(("torchvision._C", "failed to find"))
-    except AttributeError:
-        data.append(("torchvision", "unknown"))
-
-    try:
-        import fvcore
-
-        data.append(("fvcore", fvcore.__version__))
-    except ImportError:
-        pass
-
-    try:
-        import cv2
-
-        data.append(("cv2", cv2.__version__))
-    except ImportError:
-        pass
-    env_str = tabulate(data) + "\n"
-    env_str += collect_torch_env()
-    return env_str
-
-
-if __name__ == "__main__":
-    try:
-        import detectron2  # noqa
-    except ImportError:
-        print(collect_env_info())
-    else:
-        from detectron2.utils.collect_env import collect_env_info
-
-        print(collect_env_info())
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py
deleted file mode 100644
index 1bf1455..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-"""
-An awesome colormap for really neat visualizations.
-Copied from Detectron, and removed gray colors.
-"""
-
-import numpy as np
-
-__all__ = ["colormap", "random_color"]
-
-# fmt: off
-# RGB:
-_COLORS = np.array(
-    [
-        0.000, 0.447, 0.741,
-        0.850, 0.325, 0.098,
-        0.929, 0.694, 0.125,
-        0.494, 0.184, 0.556,
-        0.466, 0.674, 0.188,
-        0.301, 0.745, 0.933,
-        0.635, 0.078, 0.184,
-        0.300, 0.300, 0.300,
-        0.600, 0.600, 0.600,
-        1.000, 0.000, 0.000,
-        1.000, 0.500, 0.000,
-        0.749, 0.749, 0.000,
-        0.000, 1.000, 0.000,
-        0.000, 0.000, 1.000,
-        0.667, 0.000, 1.000,
-        0.333, 0.333, 0.000,
-        0.333, 0.667, 0.000,
-        0.333, 1.000, 0.000,
-        0.667, 0.333, 0.000,
-        0.667, 0.667, 0.000,
-        0.667, 1.000, 0.000,
-        1.000, 0.333, 0.000,
-        1.000, 0.667, 0.000,
-        1.000, 1.000, 0.000,
-        0.000, 0.333, 0.500,
-        0.000, 0.667, 0.500,
-        0.000, 1.000, 0.500,
-        0.333, 0.000, 0.500,
-        0.333, 0.333, 0.500,
-        0.333, 0.667, 0.500,
-        0.333, 1.000, 0.500,
-        0.667, 0.000, 0.500,
-        0.667, 0.333, 0.500,
-        0.667, 0.667, 0.500,
-        0.667, 1.000, 0.500,
-        1.000, 0.000, 0.500,
-        1.000, 0.333, 0.500,
-        1.000, 0.667, 0.500,
-        1.000, 1.000, 0.500,
-        0.000, 0.333, 1.000,
-        0.000, 0.667, 1.000,
-        0.000, 1.000, 1.000,
-        0.333, 0.000, 1.000,
-        0.333, 0.333, 1.000,
-        0.333, 0.667, 1.000,
-        0.333, 1.000, 1.000,
-        0.667, 0.000, 1.000,
-        0.667, 0.333, 1.000,
-        0.667, 0.667, 1.000,
-        0.667, 1.000, 1.000,
-        1.000, 0.000, 1.000,
-        1.000, 0.333, 1.000,
-        1.000, 0.667, 1.000,
-        0.333, 0.000, 0.000,
-        0.500, 0.000, 0.000,
-        0.667, 0.000, 0.000,
-        0.833, 0.000, 0.000,
-        1.000, 0.000, 0.000,
-        0.000, 0.167, 0.000,
-        0.000, 0.333, 0.000,
-        0.000, 0.500, 0.000,
-        0.000, 0.667, 0.000,
-        0.000, 0.833, 0.000,
-        0.000, 1.000, 0.000,
-        0.000, 0.000, 0.167,
-        0.000, 0.000, 0.333,
-        0.000, 0.000, 0.500,
-        0.000, 0.000, 0.667,
-        0.000, 0.000, 0.833,
-        0.000, 0.000, 1.000,
-        0.000, 0.000, 0.000,
-        0.143, 0.143, 0.143,
-        0.857, 0.857, 0.857,
-        1.000, 1.000, 1.000
-    ]
-).astype(np.float32).reshape(-1, 3)
-# fmt: on
-
-
-def colormap(rgb=False, maximum=255):
-    """
-    Args:
-        rgb (bool): whether to return RGB colors or BGR colors.
-        maximum (int): either 255 or 1
-
-    Returns:
-        ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
-    """
-    assert maximum in [255, 1], maximum
-    c = _COLORS * maximum
-    if not rgb:
-        c = c[:, ::-1]
-    return c
-
-
-def random_color(rgb=False, maximum=255):
-    """
-    Args:
-        rgb (bool): whether to return RGB colors or BGR colors.
-        maximum (int): either 255 or 1
-
-    Returns:
-        ndarray: a vector of 3 numbers
-    """
-    idx = np.random.randint(0, len(_COLORS))
-    ret = _COLORS[idx] * maximum
-    if not rgb:
-        ret = ret[::-1]
-    return ret
-
-
-if __name__ == "__main__":
-    import cv2
-
-    size = 100
-    H, W = 10, 10
-    canvas = np.random.rand(H * size, W * size, 3).astype("float32")
-    for h in range(H):
-        for w in range(W):
-            idx = h * W + w
-            if idx >= len(_COLORS):
-                break
-            canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
-    cv2.imshow("a", canvas)
-    cv2.waitKey(0)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py
deleted file mode 100644
index 8cc7b3d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-This file contains primitives for multi-gpu communication.
-This is useful when doing distributed training.
-"""
-
-import functools
-import logging
-import numpy as np
-import pickle
-import torch
-import torch.distributed as dist
-
-_LOCAL_PROCESS_GROUP = None
-"""
-A torch process group which only includes processes that on the same machine as the current process.
-This variable is set when processes are spawned by `launch()` in "engine/launch.py".
-"""
-
-
-def get_world_size() -> int:
-    if not dist.is_available():
-        return 1
-    if not dist.is_initialized():
-        return 1
-    return dist.get_world_size()
-
-
-def get_rank() -> int:
-    if not dist.is_available():
-        return 0
-    if not dist.is_initialized():
-        return 0
-    return dist.get_rank()
-
-
-def get_local_rank() -> int:
-    """
-    Returns:
-        The rank of the current process within the local (per-machine) process group.
-    """
-    if not dist.is_available():
-        return 0
-    if not dist.is_initialized():
-        return 0
-    assert _LOCAL_PROCESS_GROUP is not None
-    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
-
-
-def get_local_size() -> int:
-    """
-    Returns:
-        The size of the per-machine process group,
-        i.e. the number of processes per machine.
-    """
-    if not dist.is_available():
-        return 1
-    if not dist.is_initialized():
-        return 1
-    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
-
-
-def is_main_process() -> bool:
-    return get_rank() == 0
-
-
-def synchronize():
-    """
-    Helper function to synchronize (barrier) among all processes when
-    using distributed training
-    """
-    if not dist.is_available():
-        return
-    if not dist.is_initialized():
-        return
-    world_size = dist.get_world_size()
-    if world_size == 1:
-        return
-    dist.barrier()
-
-
-@functools.lru_cache()
-def _get_global_gloo_group():
-    """
-    Return a process group based on gloo backend, containing all the ranks
-    The result is cached.
-    """
-    if dist.get_backend() == "nccl":
-        return dist.new_group(backend="gloo")
-    else:
-        return dist.group.WORLD
-
-
-def _serialize_to_tensor(data, group):
-    backend = dist.get_backend(group)
-    assert backend in ["gloo", "nccl"]
-    device = torch.device("cpu" if backend == "gloo" else "cuda")
-
-    buffer = pickle.dumps(data)
-    if len(buffer) > 1024 ** 3:
-        logger = logging.getLogger(__name__)
-        logger.warning(
-            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
-                get_rank(), len(buffer) / (1024 ** 3), device
-            )
-        )
-    storage = torch.ByteStorage.from_buffer(buffer)
-    tensor = torch.ByteTensor(storage).to(device=device)
-    return tensor
-
-
-def _pad_to_largest_tensor(tensor, group):
-    """
-    Returns:
-        list[int]: size of the tensor, on each rank
-        Tensor: padded tensor that has the max size
-    """
-    world_size = dist.get_world_size(group=group)
-    assert (
-        world_size >= 1
-    ), "comm.gather/all_gather must be called from ranks within the given group!"
-    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
-    size_list = [
-        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
-    ]
-    dist.all_gather(size_list, local_size, group=group)
-    size_list = [int(size.item()) for size in size_list]
-
-    max_size = max(size_list)
-
-    # we pad the tensor because torch all_gather does not support
-    # gathering tensors of different shapes
-    if local_size != max_size:
-        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
-        tensor = torch.cat((tensor, padding), dim=0)
-    return size_list, tensor
-
-
-def all_gather(data, group=None):
-    """
-    Run all_gather on arbitrary picklable data (not necessarily tensors).
-
-    Args:
-        data: any picklable object
-        group: a torch process group. By default, will use a group which
-            contains all ranks on gloo backend.
-
-    Returns:
-        list[data]: list of data gathered from each rank
-    """
-    if get_world_size() == 1:
-        return [data]
-    if group is None:
-        group = _get_global_gloo_group()
-    if dist.get_world_size(group) == 1:
-        return [data]
-
-    tensor = _serialize_to_tensor(data, group)
-
-    size_list, tensor = _pad_to_largest_tensor(tensor, group)
-    max_size = max(size_list)
-
-    # receiving Tensor from all ranks
-    tensor_list = [
-        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
-    ]
-    dist.all_gather(tensor_list, tensor, group=group)
-
-    data_list = []
-    for size, tensor in zip(size_list, tensor_list):
-        buffer = tensor.cpu().numpy().tobytes()[:size]
-        data_list.append(pickle.loads(buffer))
-
-    return data_list
-
-
-def gather(data, dst=0, group=None):
-    """
-    Run gather on arbitrary picklable data (not necessarily tensors).
-
-    Args:
-        data: any picklable object
-        dst (int): destination rank
-        group: a torch process group. By default, will use a group which
-            contains all ranks on gloo backend.
-
-    Returns:
-        list[data]: on dst, a list of data gathered from each rank. Otherwise,
-            an empty list.
-    """
-    if get_world_size() == 1:
-        return [data]
-    if group is None:
-        group = _get_global_gloo_group()
-    if dist.get_world_size(group=group) == 1:
-        return [data]
-    rank = dist.get_rank(group=group)
-
-    tensor = _serialize_to_tensor(data, group)
-    size_list, tensor = _pad_to_largest_tensor(tensor, group)
-
-    # receiving Tensor from all ranks
-    if rank == dst:
-        max_size = max(size_list)
-        tensor_list = [
-            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
-        ]
-        dist.gather(tensor, tensor_list, dst=dst, group=group)
-
-        data_list = []
-        for size, tensor in zip(size_list, tensor_list):
-            buffer = tensor.cpu().numpy().tobytes()[:size]
-            data_list.append(pickle.loads(buffer))
-        return data_list
-    else:
-        dist.gather(tensor, [], dst=dst, group=group)
-        return []
-
-
-def shared_random_seed():
-    """
-    Returns:
-        int: a random number that is the same across all workers.
-            If workers need a shared RNG, they can use this shared seed to
-            create one.
-
-    All workers must call this function, otherwise it will deadlock.
-    """
-    ints = np.random.randint(2 ** 31)
-    all_ints = all_gather(ints)
-    return all_ints[0]
-
-
-def reduce_dict(input_dict, average=True):
-    """
-    Reduce the values in the dictionary from all processes so that process with rank
-    0 has the reduced results.
-
-    Args:
-        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
-        average (bool): whether to do average or sum
-
-    Returns:
-        a dict with the same keys as input_dict, after reduction.
-    """
-    world_size = get_world_size()
-    if world_size < 2:
-        return input_dict
-    with torch.no_grad():
-        names = []
-        values = []
-        # sort the keys so that they are consistent across processes
-        for k in sorted(input_dict.keys()):
-            names.append(k)
-            values.append(input_dict[k])
-        values = torch.stack(values, dim=0)
-        dist.reduce(values, dst=0)
-        if dist.get_rank() == 0 and average:
-            # only main process gets accumulated, so only divide by
-            # world_size in this case
-            values /= world_size
-        reduced_dict = {k: v for k, v in zip(names, values)}
-    return reduced_dict
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py
deleted file mode 100644
index 6769cae..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import importlib
-import importlib.util
-import logging
-import numpy as np
-import os
-import random
-import sys
-from datetime import datetime
-import torch
-
-__all__ = ["seed_all_rng"]
-
-
-def seed_all_rng(seed=None):
-    """
-    Set the random seed for the RNG in torch, numpy and python.
-
-    Args:
-        seed (int): if None, will use a strong random seed.
-    """
-    if seed is None:
-        seed = (
-            os.getpid()
-            + int(datetime.now().strftime("%S%f"))
-            + int.from_bytes(os.urandom(2), "big")
-        )
-        logger = logging.getLogger(__name__)
-        logger.info("Using a generated random seed {}".format(seed))
-    np.random.seed(seed)
-    torch.set_rng_state(torch.manual_seed(seed).get_state())
-    random.seed(seed)
-
-
-# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
-def _import_file(module_name, file_path, make_importable=False):
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)
-    if make_importable:
-        sys.modules[module_name] = module
-    return module
-
-
-def _configure_libraries():
-    """
-    Configurations for some libraries.
-    """
-    # An environment option to disable `import cv2` globally,
-    # in case it leads to negative performance impact
-    disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
-    if disable_cv2:
-        sys.modules["cv2"] = None
-    else:
-        # Disable opencl in opencv since its interaction with cuda often has negative effects
-        # This envvar is supported after OpenCV 3.4.0
-        os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
-        try:
-            import cv2
-
-            if int(cv2.__version__.split(".")[0]) >= 3:
-                cv2.ocl.setUseOpenCL(False)
-        except ImportError:
-            pass
-
-    def get_version(module, digit=2):
-        return tuple(map(int, module.__version__.split(".")[:digit]))
-
-    # fmt: off
-    assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
-    import fvcore
-    assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1"
-    import yaml
-    assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
-    # fmt: on
-
-
-_ENV_SETUP_DONE = False
-
-
-def setup_environment():
-    """Perform environment setup work. The default setup is a no-op, but this
-    function allows the user to specify a Python source file or a module in
-    the $DETECTRON2_ENV_MODULE environment variable, that performs
-    custom setup work that may be necessary to their computing environment.
-    """
-    global _ENV_SETUP_DONE
-    if _ENV_SETUP_DONE:
-        return
-    _ENV_SETUP_DONE = True
-
-    _configure_libraries()
-
-    custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
-
-    if custom_module_path:
-        setup_custom_environment(custom_module_path)
-    else:
-        # The default setup is a no-op
-        pass
-
-
-def setup_custom_environment(custom_module):
-    """
-    Load custom environment setup by importing a Python source file or a
-    module, and run the setup function.
-    """
-    if custom_module.endswith(".py"):
-        module = _import_file("detectron2.utils.env.custom_module", custom_module)
-    else:
-        module = importlib.import_module(custom_module)
-    assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
-        "Custom environment module defined in {} does not have the "
-        "required callable attribute 'setup_environment'."
-    ).format(custom_module)
-    module.setup_environment()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py
deleted file mode 100644
index a3c57ed..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py
+++ /dev/null
@@ -1,432 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import datetime
-import json
-import logging
-import os
-import time
-from collections import defaultdict
-from contextlib import contextmanager
-import torch
-from fvcore.common.file_io import PathManager
-from fvcore.common.history_buffer import HistoryBuffer
-
-_CURRENT_STORAGE_STACK = []
-
-
-def get_event_storage():
-    """
-    Returns:
-        The :class:`EventStorage` object that's currently being used.
-        Throws an error if no :class:`EventStorage` is currently enabled.
-    """
-    assert len(
-        _CURRENT_STORAGE_STACK
-    ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!"
-    return _CURRENT_STORAGE_STACK[-1]
-
-
-class EventWriter:
-    """
-    Base class for writers that obtain events from :class:`EventStorage` and process them.
-    """
-
-    def write(self):
-        raise NotImplementedError
-
-    def close(self):
-        pass
-
-
-class JSONWriter(EventWriter):
-    """
-    Write scalars to a json file.
-
-    It saves scalars as one json per line (instead of a big json) for easy parsing.
-
-    Examples parsing such a json file:
-
-    .. code-block:: none
-
-        $ cat metrics.json | jq -s '.[0:2]'
-        [
-          {
-            "data_time": 0.008433341979980469,
-            "iteration": 20,
-            "loss": 1.9228371381759644,
-            "loss_box_reg": 0.050025828182697296,
-            "loss_classifier": 0.5316952466964722,
-            "loss_mask": 0.7236229181289673,
-            "loss_rpn_box": 0.0856662318110466,
-            "loss_rpn_cls": 0.48198649287223816,
-            "lr": 0.007173333333333333,
-            "time": 0.25401854515075684
-          },
-          {
-            "data_time": 0.007216215133666992,
-            "iteration": 40,
-            "loss": 1.282649278640747,
-            "loss_box_reg": 0.06222952902317047,
-            "loss_classifier": 0.30682939291000366,
-            "loss_mask": 0.6970193982124329,
-            "loss_rpn_box": 0.038663312792778015,
-            "loss_rpn_cls": 0.1471673548221588,
-            "lr": 0.007706666666666667,
-            "time": 0.2490077018737793
-          }
-        ]
-
-        $ cat metrics.json | jq '.loss_mask'
-        0.7126231789588928
-        0.689423680305481
-        0.6776131987571716
-        ...
-
-    """
-
-    def __init__(self, json_file, window_size=20):
-        """
-        Args:
-            json_file (str): path to the json file. New data will be appended if the file exists.
-            window_size (int): the window size of median smoothing for the scalars whose
-                `smoothing_hint` are True.
-        """
-        self._file_handle = PathManager.open(json_file, "a")
-        self._window_size = window_size
-
-    def write(self):
-        storage = get_event_storage()
-        to_save = {"iteration": storage.iter}
-        to_save.update(storage.latest_with_smoothing_hint(self._window_size))
-        self._file_handle.write(json.dumps(to_save, sort_keys=True) + "\n")
-        self._file_handle.flush()
-        try:
-            os.fsync(self._file_handle.fileno())
-        except AttributeError:
-            pass
-
-    def close(self):
-        self._file_handle.close()
-
-
-class TensorboardXWriter(EventWriter):
-    """
-    Write all scalars to a tensorboard file.
-    """
-
-    def __init__(self, log_dir: str, window_size: int = 20, **kwargs):
-        """
-        Args:
-            log_dir (str): the directory to save the output events
-            window_size (int): the scalars will be median-smoothed by this window size
-
-            kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)`
-        """
-        self._window_size = window_size
-        from torch.utils.tensorboard import SummaryWriter
-
-        self._writer = SummaryWriter(log_dir, **kwargs)
-
-    def write(self):
-        storage = get_event_storage()
-        for k, v in storage.latest_with_smoothing_hint(self._window_size).items():
-            self._writer.add_scalar(k, v, storage.iter)
-
-        # storage.put_{image,histogram} is only meant to be used by
-        # tensorboard writer. So we access its internal fields directly from here.
-        if len(storage._vis_data) >= 1:
-            for img_name, img, step_num in storage._vis_data:
-                self._writer.add_image(img_name, img, step_num)
-            # Storage stores all image data and rely on this writer to clear them.
-            # As a result it assumes only one writer will use its image data.
-            # An alternative design is to let storage store limited recent
-            # data (e.g. only the most recent image) that all writers can access.
-            # In that case a writer may not see all image data if its period is long.
-            storage.clear_images()
-
-        if len(storage._histograms) >= 1:
-            for params in storage._histograms:
-                self._writer.add_histogram_raw(**params)
-            storage.clear_histograms()
-
-    def close(self):
-        if hasattr(self, "_writer"):  # doesn't exist when the code fails at import
-            self._writer.close()
-
-
-class CommonMetricPrinter(EventWriter):
-    """
-    Print **common** metrics to the terminal, including
-    iteration time, ETA, memory, all losses, and the learning rate.
-
-    To print something different, please implement a similar printer by yourself.
-    """
-
-    def __init__(self, max_iter):
-        """
-        Args:
-            max_iter (int): the maximum number of iterations to train.
-                Used to compute ETA.
-        """
-        self.logger = logging.getLogger(__name__)
-        self._max_iter = max_iter
-        self._last_write = None
-
-    def write(self):
-        storage = get_event_storage()
-        iteration = storage.iter
-
-        try:
-            data_time = storage.history("data_time").avg(20)
-        except KeyError:
-            # they may not exist in the first few iterations (due to warmup)
-            # or when SimpleTrainer is not used
-            data_time = None
-
-        eta_string = None
-        try:
-            iter_time = storage.history("time").global_avg()
-            eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration)
-            storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False)
-            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
-        except KeyError:
-            iter_time = None
-            # estimate eta on our own - more noisy
-            if self._last_write is not None:
-                estimate_iter_time = (time.perf_counter() - self._last_write[1]) / (
-                    iteration - self._last_write[0]
-                )
-                eta_seconds = estimate_iter_time * (self._max_iter - iteration)
-                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
-            self._last_write = (iteration, time.perf_counter())
-
-        try:
-            lr = "{:.6f}".format(storage.history("lr").latest())
-        except KeyError:
-            lr = "N/A"
-
-        if torch.cuda.is_available():
-            max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
-        else:
-            max_mem_mb = None
-
-        # NOTE: max_mem is parsed by grep in "dev/parse_results.sh"
-        self.logger.info(
-            " {eta}iter: {iter}  {losses}  {time}{data_time}lr: {lr}  {memory}".format(
-                eta=f"eta: {eta_string}  " if eta_string else "",
-                iter=iteration,
-                losses="  ".join(
-                    [
-                        "{}: {:.3f}".format(k, v.median(20))
-                        for k, v in storage.histories().items()
-                        if "loss" in k
-                    ]
-                ),
-                time="time: {:.4f}  ".format(iter_time) if iter_time is not None else "",
-                data_time="data_time: {:.4f}  ".format(data_time) if data_time is not None else "",
-                lr=lr,
-                memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "",
-            )
-        )
-
-
-class EventStorage:
-    """
-    The user-facing class that provides metric storage functionalities.
-
-    In the future we may add support for storing / logging other types of data if needed.
-    """
-
-    def __init__(self, start_iter=0):
-        """
-        Args:
-            start_iter (int): the iteration number to start with
-        """
-        self._history = defaultdict(HistoryBuffer)
-        self._smoothing_hints = {}
-        self._latest_scalars = {}
-        self._iter = start_iter
-        self._current_prefix = ""
-        self._vis_data = []
-        self._histograms = []
-
-    def put_image(self, img_name, img_tensor):
-        """
-        Add an `img_tensor` associated with `img_name`, to be shown on
-        tensorboard.
-
-        Args:
-            img_name (str): The name of the image to put into tensorboard.
-            img_tensor (torch.Tensor or numpy.array): An `uint8` or `float`
-                Tensor of shape `[channel, height, width]` where `channel` is
-                3. The image format should be RGB. The elements in img_tensor
-                can either have values in [0, 1] (float32) or [0, 255] (uint8).
-                The `img_tensor` will be visualized in tensorboard.
-        """
-        self._vis_data.append((img_name, img_tensor, self._iter))
-
-    def put_scalar(self, name, value, smoothing_hint=True):
-        """
-        Add a scalar `value` to the `HistoryBuffer` associated with `name`.
-
-        Args:
-            smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be
-                smoothed when logged. The hint will be accessible through
-                :meth:`EventStorage.smoothing_hints`.  A writer may ignore the hint
-                and apply custom smoothing rule.
-
-                It defaults to True because most scalars we save need to be smoothed to
-                provide any useful signal.
-        """
-        name = self._current_prefix + name
-        history = self._history[name]
-        value = float(value)
-        history.update(value, self._iter)
-        self._latest_scalars[name] = value
-
-        existing_hint = self._smoothing_hints.get(name)
-        if existing_hint is not None:
-            assert (
-                existing_hint == smoothing_hint
-            ), "Scalar {} was put with a different smoothing_hint!".format(name)
-        else:
-            self._smoothing_hints[name] = smoothing_hint
-
-    def put_scalars(self, *, smoothing_hint=True, **kwargs):
-        """
-        Put multiple scalars from keyword arguments.
-
-        Examples:
-
-            storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True)
-        """
-        for k, v in kwargs.items():
-            self.put_scalar(k, v, smoothing_hint=smoothing_hint)
-
-    def put_histogram(self, hist_name, hist_tensor, bins=1000):
-        """
-        Create a histogram from a tensor.
-
-        Args:
-            hist_name (str): The name of the histogram to put into tensorboard.
-            hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted
-                into a histogram.
-            bins (int): Number of histogram bins.
-        """
-        ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item()
-
-        # Create a histogram with PyTorch
-        hist_counts = torch.histc(hist_tensor, bins=bins)
-        hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32)
-
-        # Parameter for the add_histogram_raw function of SummaryWriter
-        hist_params = dict(
-            tag=hist_name,
-            min=ht_min,
-            max=ht_max,
-            num=len(hist_tensor),
-            sum=float(hist_tensor.sum()),
-            sum_squares=float(torch.sum(hist_tensor ** 2)),
-            bucket_limits=hist_edges[1:].tolist(),
-            bucket_counts=hist_counts.tolist(),
-            global_step=self._iter,
-        )
-        self._histograms.append(hist_params)
-
-    def history(self, name):
-        """
-        Returns:
-            HistoryBuffer: the scalar history for name
-        """
-        ret = self._history.get(name, None)
-        if ret is None:
-            raise KeyError("No history metric available for {}!".format(name))
-        return ret
-
-    def histories(self):
-        """
-        Returns:
-            dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars
-        """
-        return self._history
-
-    def latest(self):
-        """
-        Returns:
-            dict[name -> number]: the scalars that's added in the current iteration.
-        """
-        return self._latest_scalars
-
-    def latest_with_smoothing_hint(self, window_size=20):
-        """
-        Similar to :meth:`latest`, but the returned values
-        are either the un-smoothed original latest value,
-        or a median of the given window_size,
-        depend on whether the smoothing_hint is True.
-
-        This provides a default behavior that other writers can use.
-        """
-        result = {}
-        for k, v in self._latest_scalars.items():
-            result[k] = self._history[k].median(window_size) if self._smoothing_hints[k] else v
-        return result
-
-    def smoothing_hints(self):
-        """
-        Returns:
-            dict[name -> bool]: the user-provided hint on whether the scalar
-                is noisy and needs smoothing.
-        """
-        return self._smoothing_hints
-
-    def step(self):
-        """
-        User should call this function at the beginning of each iteration, to
-        notify the storage of the start of a new iteration.
-        The storage will then be able to associate the new data with the
-        correct iteration number.
-        """
-        self._iter += 1
-        self._latest_scalars = {}
-
-    @property
-    def iter(self):
-        return self._iter
-
-    @property
-    def iteration(self):
-        # for backward compatibility
-        return self._iter
-
-    def __enter__(self):
-        _CURRENT_STORAGE_STACK.append(self)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        assert _CURRENT_STORAGE_STACK[-1] == self
-        _CURRENT_STORAGE_STACK.pop()
-
-    @contextmanager
-    def name_scope(self, name):
-        """
-        Yields:
-            A context within which all the events added to this storage
-            will be prefixed by the name scope.
-        """
-        old_prefix = self._current_prefix
-        self._current_prefix = name.rstrip("/") + "/"
-        yield
-        self._current_prefix = old_prefix
-
-    def clear_images(self):
-        """
-        Delete all the stored images for visualization. This should be called
-        after images are written to tensorboard.
-        """
-        self._vis_data = []
-
-    def clear_histograms(self):
-        """
-        Delete all the stored histograms for visualization.
-        This should be called after histograms are written to tensorboard.
-        """
-        self._histograms = []
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py
deleted file mode 100644
index b6496d9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import functools
-import logging
-import os
-import sys
-import time
-from collections import Counter
-from fvcore.common.file_io import PathManager
-from tabulate import tabulate
-from termcolor import colored
-
-
-class _ColorfulFormatter(logging.Formatter):
-    def __init__(self, *args, **kwargs):
-        self._root_name = kwargs.pop("root_name") + "."
-        self._abbrev_name = kwargs.pop("abbrev_name", "")
-        if len(self._abbrev_name):
-            self._abbrev_name = self._abbrev_name + "."
-        super(_ColorfulFormatter, self).__init__(*args, **kwargs)
-
-    def formatMessage(self, record):
-        record.name = record.name.replace(self._root_name, self._abbrev_name)
-        log = super(_ColorfulFormatter, self).formatMessage(record)
-        if record.levelno == logging.WARNING:
-            prefix = colored("WARNING", "red", attrs=["blink"])
-        elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
-            prefix = colored("ERROR", "red", attrs=["blink", "underline"])
-        else:
-            return log
-        return prefix + " " + log
-
-
-@functools.lru_cache()  # so that calling setup_logger multiple times won't add many handlers
-def setup_logger(
-    output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None
-):
-    """
-    Initialize the detectron2 logger and set its verbosity level to "DEBUG".
-
-    Args:
-        output (str): a file name or a directory to save log. If None, will not save log file.
-            If ends with ".txt" or ".log", assumed to be a file name.
-            Otherwise, logs will be saved to `output/log.txt`.
-        name (str): the root module name of this logger
-        abbrev_name (str): an abbreviation of the module, to avoid long names in logs.
-            Set to "" to not log the root module in logs.
-            By default, will abbreviate "detectron2" to "d2" and leave other
-            modules unchanged.
-
-    Returns:
-        logging.Logger: a logger
-    """
-    logger = logging.getLogger(name)
-    logger.setLevel(logging.DEBUG)
-    logger.propagate = False
-
-    if abbrev_name is None:
-        abbrev_name = "d2" if name == "detectron2" else name
-
-    plain_formatter = logging.Formatter(
-        "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S"
-    )
-    # stdout logging: master only
-    if distributed_rank == 0:
-        ch = logging.StreamHandler(stream=sys.stdout)
-        ch.setLevel(logging.DEBUG)
-        if color:
-            formatter = _ColorfulFormatter(
-                colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
-                datefmt="%m/%d %H:%M:%S",
-                root_name=name,
-                abbrev_name=str(abbrev_name),
-            )
-        else:
-            formatter = plain_formatter
-        ch.setFormatter(formatter)
-        logger.addHandler(ch)
-
-    # file logging: all workers
-    if output is not None:
-        if output.endswith(".txt") or output.endswith(".log"):
-            filename = output
-        else:
-            filename = os.path.join(output, "log.txt")
-        if distributed_rank > 0:
-            filename = filename + ".rank{}".format(distributed_rank)
-        PathManager.mkdirs(os.path.dirname(filename))
-
-        fh = logging.StreamHandler(_cached_log_stream(filename))
-        fh.setLevel(logging.DEBUG)
-        fh.setFormatter(plain_formatter)
-        logger.addHandler(fh)
-
-    return logger
-
-
-# cache the opened file object, so that different calls to `setup_logger`
-# with the same file name can safely write to the same file.
-@functools.lru_cache(maxsize=None)
-def _cached_log_stream(filename):
-    return PathManager.open(filename, "a")
-
-
-"""
-Below are some other convenient logging methods.
-They are mainly adopted from
-https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py
-"""
-
-
-def _find_caller():
-    """
-    Returns:
-        str: module name of the caller
-        tuple: a hashable key to be used to identify different callers
-    """
-    frame = sys._getframe(2)
-    while frame:
-        code = frame.f_code
-        if os.path.join("utils", "logger.") not in code.co_filename:
-            mod_name = frame.f_globals["__name__"]
-            if mod_name == "__main__":
-                mod_name = "detectron2"
-            return mod_name, (code.co_filename, frame.f_lineno, code.co_name)
-        frame = frame.f_back
-
-
-_LOG_COUNTER = Counter()
-_LOG_TIMER = {}
-
-
-def log_first_n(lvl, msg, n=1, *, name=None, key="caller"):
-    """
-    Log only for the first n times.
-
-    Args:
-        lvl (int): the logging level
-        msg (str):
-        n (int):
-        name (str): name of the logger to use. Will use the caller's module by default.
-        key (str or tuple[str]): the string(s) can be one of "caller" or
-            "message", which defines how to identify duplicated logs.
-            For example, if called with `n=1, key="caller"`, this function
-            will only log the first call from the same caller, regardless of
-            the message content.
-            If called with `n=1, key="message"`, this function will log the
-            same content only once, even if they are called from different places.
-            If called with `n=1, key=("caller", "message")`, this function
-            will not log only if the same caller has logged the same message before.
-    """
-    if isinstance(key, str):
-        key = (key,)
-    assert len(key) > 0
-
-    caller_module, caller_key = _find_caller()
-    hash_key = ()
-    if "caller" in key:
-        hash_key = hash_key + caller_key
-    if "message" in key:
-        hash_key = hash_key + (msg,)
-
-    _LOG_COUNTER[hash_key] += 1
-    if _LOG_COUNTER[hash_key] <= n:
-        logging.getLogger(name or caller_module).log(lvl, msg)
-
-
-def log_every_n(lvl, msg, n=1, *, name=None):
-    """
-    Log once per n times.
-
-    Args:
-        lvl (int): the logging level
-        msg (str):
-        n (int):
-        name (str): name of the logger to use. Will use the caller's module by default.
-    """
-    caller_module, key = _find_caller()
-    _LOG_COUNTER[key] += 1
-    if n == 1 or _LOG_COUNTER[key] % n == 1:
-        logging.getLogger(name or caller_module).log(lvl, msg)
-
-
-def log_every_n_seconds(lvl, msg, n=1, *, name=None):
-    """
-    Log no more than once per n seconds.
-
-    Args:
-        lvl (int): the logging level
-        msg (str):
-        n (int):
-        name (str): name of the logger to use. Will use the caller's module by default.
-    """
-    caller_module, key = _find_caller()
-    last_logged = _LOG_TIMER.get(key, None)
-    current_time = time.time()
-    if last_logged is None or current_time - last_logged >= n:
-        logging.getLogger(name or caller_module).log(lvl, msg)
-        _LOG_TIMER[key] = current_time
-
-
-def create_small_table(small_dict):
-    """
-    Create a small table using the keys of small_dict as headers. This is only
-    suitable for small dictionaries.
-
-    Args:
-        small_dict (dict): a result dictionary of only a few items.
-
-    Returns:
-        str: the table as a string.
-    """
-    keys, values = tuple(zip(*small_dict.items()))
-    table = tabulate(
-        [values],
-        headers=keys,
-        tablefmt="pipe",
-        floatfmt=".3f",
-        stralign="center",
-        numalign="center",
-    )
-    return table
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py
deleted file mode 100644
index d495a16..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import logging
-from contextlib import contextmanager
-from functools import wraps
-import torch
-
-__all__ = ["retry_if_cuda_oom"]
-
-
-@contextmanager
-def _ignore_torch_cuda_oom():
-    """
-    A context which ignores CUDA OOM exception from pytorch.
-    """
-    try:
-        yield
-    except RuntimeError as e:
-        # NOTE: the string may change?
-        if "CUDA out of memory. " in str(e):
-            pass
-        else:
-            raise
-
-
-def retry_if_cuda_oom(func):
-    """
-    Makes a function retry itself after encountering
-    pytorch's CUDA OOM error.
-    It will first retry after calling `torch.cuda.empty_cache()`.
-
-    If that still fails, it will then retry by trying to convert inputs to CPUs.
-    In this case, it expects the function to dispatch to CPU implementation.
-    The return values may become CPU tensors as well and it's user's
-    responsibility to convert it back to CUDA tensor if needed.
-
-    Args:
-        func: a stateless callable that takes tensor-like objects as arguments
-
-    Returns:
-        a callable which retries `func` if OOM is encountered.
-
-    Examples:
-
-    .. code-block:: python
-
-        output = retry_if_cuda_oom(some_torch_function)(input1, input2)
-        # output may be on CPU even if inputs are on GPU
-
-    Note:
-        1. When converting inputs to CPU, it will only look at each argument and check
-           if it has `.device` and `.to` for conversion. Nested structures of tensors
-           are not supported.
-
-        2. Since the function might be called more than once, it has to be
-           stateless.
-    """
-
-    def maybe_to_cpu(x):
-        try:
-            like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
-        except AttributeError:
-            like_gpu_tensor = False
-        if like_gpu_tensor:
-            return x.to(device="cpu")
-        else:
-            return x
-
-    @wraps(func)
-    def wrapped(*args, **kwargs):
-        with _ignore_torch_cuda_oom():
-            return func(*args, **kwargs)
-
-        # Clear cache and retry
-        torch.cuda.empty_cache()
-        with _ignore_torch_cuda_oom():
-            return func(*args, **kwargs)
-
-        # Try on CPU. This slows down the code significantly, therefore print a notice.
-        logger = logging.getLogger(__name__)
-        logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
-        new_args = (maybe_to_cpu(x) for x in args)
-        new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
-        return func(*new_args, **new_kwargs)
-
-    return wrapped
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py
deleted file mode 100644
index fea1de9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# Keep this module for backward compatibility.
-from fvcore.common.registry import Registry  # noqa
-
-__all__ = ["Registry"]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py
deleted file mode 100644
index 734a62c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import cloudpickle
-
-
-class PicklableWrapper(object):
-    """
-    Wrap an object to make it more picklable, note that it uses
-    heavy weight serialization libraries that are slower than pickle.
-    It's best to use it only on closures (which are usually not picklable).
-
-    This is a simplified version of
-    https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
-    """
-
-    def __init__(self, obj):
-        self._obj = obj
-
-    def __reduce__(self):
-        s = cloudpickle.dumps(self._obj)
-        return cloudpickle.loads, (s,)
-
-    def __call__(self, *args, **kwargs):
-        return self._obj(*args, **kwargs)
-
-    def __getattr__(self, attr):
-        # Ensure that the wrapped object can be used seamlessly as the previous object.
-        if attr not in ["_obj"]:
-            return getattr(self._obj, attr)
-        return getattr(self, attr)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py
deleted file mode 100644
index 0144b67..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import pycocotools.mask as mask_util
-
-from detectron2.utils.visualizer import (
-    ColorMode,
-    Visualizer,
-    _create_text_labels,
-    _PanopticPrediction,
-)
-
-from .colormap import random_color
-
-
-class _DetectedInstance:
-    """
-    Used to store data about detected objects in video frame,
-    in order to transfer color to objects in the future frames.
-
-    Attributes:
-        label (int):
-        bbox (tuple[float]):
-        mask_rle (dict):
-        color (tuple[float]): RGB colors in range (0, 1)
-        ttl (int): time-to-live for the instance. For example, if ttl=2,
-            the instance color can be transferred to objects in the next two frames.
-    """
-
-    __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]
-
-    def __init__(self, label, bbox, mask_rle, color, ttl):
-        self.label = label
-        self.bbox = bbox
-        self.mask_rle = mask_rle
-        self.color = color
-        self.ttl = ttl
-
-
-class VideoVisualizer:
-    def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
-        """
-        Args:
-            metadata (MetadataCatalog): image metadata.
-        """
-        self.metadata = metadata
-        self._old_instances = []
-        assert instance_mode in [
-            ColorMode.IMAGE,
-            ColorMode.IMAGE_BW,
-        ], "Other mode not supported yet."
-        self._instance_mode = instance_mode
-
-    def draw_instance_predictions(self, frame, predictions):
-        """
-        Draw instance-level prediction results on an image.
-
-        Args:
-            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
-            predictions (Instances): the output of an instance detection/segmentation
-                model. Following fields will be used to draw:
-                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        frame_visualizer = Visualizer(frame, self.metadata)
-        num_instances = len(predictions)
-        if num_instances == 0:
-            return frame_visualizer.output
-
-        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
-        scores = predictions.scores if predictions.has("scores") else None
-        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
-        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
-
-        if predictions.has("pred_masks"):
-            masks = predictions.pred_masks
-            # mask IOU is not yet enabled
-            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
-            # assert len(masks_rles) == num_instances
-        else:
-            masks = None
-
-        detected = [
-            _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
-            for i in range(num_instances)
-        ]
-        colors = self._assign_colors(detected)
-
-        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            # any() returns uint8 tensor
-            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
-                (masks.any(dim=0) > 0).numpy() if masks is not None else None
-            )
-            alpha = 0.3
-        else:
-            alpha = 0.5
-
-        frame_visualizer.overlay_instances(
-            boxes=None if masks is not None else boxes,  # boxes are a bit distracting
-            masks=masks,
-            labels=labels,
-            keypoints=keypoints,
-            assigned_colors=colors,
-            alpha=alpha,
-        )
-
-        return frame_visualizer.output
-
-    def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
-        """
-        Args:
-            sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
-                each value is the integer label.
-            area_threshold (Optional[int]): only draw segmentations larger than the threshold
-        """
-        # don't need to do anything special
-        frame_visualizer = Visualizer(frame, self.metadata)
-        frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
-        return frame_visualizer.output
-
-    def draw_panoptic_seg_predictions(
-        self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
-    ):
-        frame_visualizer = Visualizer(frame, self.metadata)
-        pred = _PanopticPrediction(panoptic_seg, segments_info)
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
-                pred.non_empty_mask()
-            )
-
-        # draw mask for all semantic segments first i.e. "stuff"
-        for mask, sinfo in pred.semantic_masks():
-            category_idx = sinfo["category_id"]
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
-            except AttributeError:
-                mask_color = None
-
-            frame_visualizer.draw_binary_mask(
-                mask,
-                color=mask_color,
-                text=self.metadata.stuff_classes[category_idx],
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-
-        all_instances = list(pred.instance_masks())
-        if len(all_instances) == 0:
-            return frame_visualizer.output
-        # draw mask for all instances second
-        masks, sinfo = list(zip(*all_instances))
-        num_instances = len(masks)
-        masks_rles = mask_util.encode(
-            np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
-        )
-        assert len(masks_rles) == num_instances
-
-        category_ids = [x["category_id"] for x in sinfo]
-        detected = [
-            _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
-            for i in range(num_instances)
-        ]
-        colors = self._assign_colors(detected)
-        labels = [self.metadata.thing_classes[k] for k in category_ids]
-
-        frame_visualizer.overlay_instances(
-            boxes=None,
-            masks=masks,
-            labels=labels,
-            keypoints=None,
-            assigned_colors=colors,
-            alpha=alpha,
-        )
-        return frame_visualizer.output
-
-    def _assign_colors(self, instances):
-        """
-        Naive tracking heuristics to assign same color to the same instance,
-        will update the internal state of tracked instances.
-
-        Returns:
-            list[tuple[float]]: list of colors.
-        """
-
-        # Compute iou with either boxes or masks:
-        is_crowd = np.zeros((len(instances),), dtype=np.bool)
-        if instances[0].bbox is None:
-            assert instances[0].mask_rle is not None
-            # use mask iou only when box iou is None
-            # because box seems good enough
-            rles_old = [x.mask_rle for x in self._old_instances]
-            rles_new = [x.mask_rle for x in instances]
-            ious = mask_util.iou(rles_old, rles_new, is_crowd)
-            threshold = 0.5
-        else:
-            boxes_old = [x.bbox for x in self._old_instances]
-            boxes_new = [x.bbox for x in instances]
-            ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
-            threshold = 0.6
-        if len(ious) == 0:
-            ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")
-
-        # Only allow matching instances of the same label:
-        for old_idx, old in enumerate(self._old_instances):
-            for new_idx, new in enumerate(instances):
-                if old.label != new.label:
-                    ious[old_idx, new_idx] = 0
-
-        matched_new_per_old = np.asarray(ious).argmax(axis=1)
-        max_iou_per_old = np.asarray(ious).max(axis=1)
-
-        # Try to find match for each old instance:
-        extra_instances = []
-        for idx, inst in enumerate(self._old_instances):
-            if max_iou_per_old[idx] > threshold:
-                newidx = matched_new_per_old[idx]
-                if instances[newidx].color is None:
-                    instances[newidx].color = inst.color
-                    continue
-            # If an old instance does not match any new instances,
-            # keep it for the next frame in case it is just missed by the detector
-            inst.ttl -= 1
-            if inst.ttl > 0:
-                extra_instances.append(inst)
-
-        # Assign random color to newly-detected instances:
-        for inst in instances:
-            if inst.color is None:
-                inst.color = random_color(rgb=True, maximum=1)
-        self._old_instances = instances[:] + extra_instances
-        return [d.color for d in instances]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py
deleted file mode 100644
index 3ffcbdb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py
+++ /dev/null
@@ -1,1143 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import colorsys
-import logging
-import math
-import numpy as np
-from enum import Enum, unique
-import cv2
-import matplotlib as mpl
-import matplotlib.colors as mplc
-import matplotlib.figure as mplfigure
-import pycocotools.mask as mask_util
-import torch
-from fvcore.common.file_io import PathManager
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-from PIL import Image
-
-from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
-
-from .colormap import random_color
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["ColorMode", "VisImage", "Visualizer"]
-
-
-_SMALL_OBJECT_AREA_THRESH = 1000
-_LARGE_MASK_AREA_THRESH = 120000
-_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
-_BLACK = (0, 0, 0)
-_RED = (1.0, 0, 0)
-
-_KEYPOINT_THRESHOLD = 0.05
-
-
-@unique
-class ColorMode(Enum):
-    """
-    Enum of different color modes to use for instance visualizations.
-    """
-
-    IMAGE = 0
-    """
-    Picks a random color for every instance and overlay segmentations with low opacity.
-    """
-    SEGMENTATION = 1
-    """
-    Let instances of the same category have similar colors
-    (from metadata.thing_colors), and overlay them with
-    high opacity. This provides more attention on the quality of segmentation.
-    """
-    IMAGE_BW = 2
-    """
-    Same as IMAGE, but convert all areas without masks to gray-scale.
-    Only available for drawing per-instance mask predictions.
-    """
-
-
-class GenericMask:
-    """
-    Attribute:
-        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
-            Each ndarray has format [x, y, x, y, ...]
-        mask (ndarray): a binary mask
-    """
-
-    def __init__(self, mask_or_polygons, height, width):
-        self._mask = self._polygons = self._has_holes = None
-        self.height = height
-        self.width = width
-
-        m = mask_or_polygons
-        if isinstance(m, dict):
-            # RLEs
-            assert "counts" in m and "size" in m
-            if isinstance(m["counts"], list):  # uncompressed RLEs
-                h, w = m["size"]
-                assert h == height and w == width
-                m = mask_util.frPyObjects(m, h, w)
-            self._mask = mask_util.decode(m)[:, :]
-            return
-
-        if isinstance(m, list):  # list[ndarray]
-            self._polygons = [np.asarray(x).reshape(-1) for x in m]
-            return
-
-        if isinstance(m, np.ndarray):  # assumed to be a binary mask
-            assert m.shape[1] != 2, m.shape
-            assert m.shape == (height, width), m.shape
-            self._mask = m.astype("uint8")
-            return
-
-        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
-
-    @property
-    def mask(self):
-        if self._mask is None:
-            self._mask = self.polygons_to_mask(self._polygons)
-        return self._mask
-
-    @property
-    def polygons(self):
-        if self._polygons is None:
-            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-        return self._polygons
-
-    @property
-    def has_holes(self):
-        if self._has_holes is None:
-            if self._mask is not None:
-                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-            else:
-                self._has_holes = False  # if original format is polygon, does not have holes
-        return self._has_holes
-
-    def mask_to_polygons(self, mask):
-        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
-        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
-        # Internal contours (holes) are placed in hierarchy-2.
-        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
-        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
-        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
-        hierarchy = res[-1]
-        if hierarchy is None:  # empty mask
-            return [], False
-        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
-        res = res[-2]
-        res = [x.flatten() for x in res]
-        res = [x for x in res if len(x) >= 6]
-        return res, has_holes
-
-    def polygons_to_mask(self, polygons):
-        rle = mask_util.frPyObjects(polygons, self.height, self.width)
-        rle = mask_util.merge(rle)
-        return mask_util.decode(rle)[:, :]
-
-    def area(self):
-        return self.mask.sum()
-
-    def bbox(self):
-        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
-        p = mask_util.merge(p)
-        bbox = mask_util.toBbox(p)
-        bbox[2] += bbox[0]
-        bbox[3] += bbox[1]
-        return bbox
-
-
-class _PanopticPrediction:
-    def __init__(self, panoptic_seg, segments_info):
-        self._seg = panoptic_seg
-
-        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
-        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
-        areas = areas.numpy()
-        sorted_idxs = np.argsort(-areas)
-        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
-        self._seg_ids = self._seg_ids.tolist()
-        for sid, area in zip(self._seg_ids, self._seg_areas):
-            if sid in self._sinfo:
-                self._sinfo[sid]["area"] = float(area)
-
-    def non_empty_mask(self):
-        """
-        Returns:
-            (H, W) array, a mask for all pixels that have a prediction
-        """
-        empty_ids = []
-        for id in self._seg_ids:
-            if id not in self._sinfo:
-                empty_ids.append(id)
-        if len(empty_ids) == 0:
-            return np.zeros(self._seg.shape, dtype=np.uint8)
-        assert (
-            len(empty_ids) == 1
-        ), ">1 ids corresponds to no labels. This is currently not supported"
-        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
-
-    def semantic_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or sinfo["isthing"]:
-                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
-                continue
-            yield (self._seg == sid).numpy().astype(np.bool), sinfo
-
-    def instance_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or not sinfo["isthing"]:
-                continue
-            mask = (self._seg == sid).numpy().astype(np.bool)
-            if mask.sum() > 0:
-                yield mask, sinfo
-
-
-def _create_text_labels(classes, scores, class_names):
-    """
-    Args:
-        classes (list[int] or None):
-        scores (list[float] or None):
-        class_names (list[str] or None):
-
-    Returns:
-        list[str] or None
-    """
-    labels = None
-    if classes is not None and class_names is not None and len(class_names) > 1:
-        labels = [class_names[i] for i in classes]
-    if scores is not None:
-        if labels is None:
-            labels = ["{:.0f}%".format(s * 100) for s in scores]
-        else:
-            labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
-    return labels
-
-
-class VisImage:
-    def __init__(self, img, scale=1.0):
-        """
-        Args:
-            img (ndarray): an RGB image of shape (H, W, 3).
-            scale (float): scale the input image
-        """
-        self.img = img
-        self.scale = scale
-        self.width, self.height = img.shape[1], img.shape[0]
-        self._setup_figure(img)
-
-    def _setup_figure(self, img):
-        """
-        Args:
-            Same as in :meth:`__init__()`.
-
-        Returns:
-            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
-            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
-        """
-        fig = mplfigure.Figure(frameon=False)
-        self.dpi = fig.get_dpi()
-        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
-        # (https://github.com/matplotlib/matplotlib/issues/15363)
-        fig.set_size_inches(
-            (self.width * self.scale + 1e-2) / self.dpi,
-            (self.height * self.scale + 1e-2) / self.dpi,
-        )
-        self.canvas = FigureCanvasAgg(fig)
-        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
-        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
-        ax.axis("off")
-        ax.set_xlim(0.0, self.width)
-        ax.set_ylim(self.height)
-
-        self.fig = fig
-        self.ax = ax
-
-    def save(self, filepath):
-        """
-        Args:
-            filepath (str): a string that contains the absolute path, including the file name, where
-                the visualized image will be saved.
-        """
-        if filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"):
-            # faster than matplotlib's imshow
-            cv2.imwrite(filepath, self.get_image()[:, :, ::-1])
-        else:
-            # support general formats (e.g. pdf)
-            self.ax.imshow(self.img, interpolation="nearest")
-            self.fig.savefig(filepath)
-
-    def get_image(self):
-        """
-        Returns:
-            ndarray:
-                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
-                The shape is scaled w.r.t the input image using the given `scale` argument.
-        """
-        canvas = self.canvas
-        s, (width, height) = canvas.print_to_buffer()
-        if (self.width, self.height) != (width, height):
-            img = cv2.resize(self.img, (width, height))
-        else:
-            img = self.img
-
-        # buf = io.BytesIO()  # works for cairo backend
-        # canvas.print_rgba(buf)
-        # width, height = self.width, self.height
-        # s = buf.getvalue()
-
-        buffer = np.frombuffer(s, dtype="uint8")
-
-        # imshow is slow. blend manually (still quite slow)
-        img_rgba = buffer.reshape(height, width, 4)
-        rgb, alpha = np.split(img_rgba, [3], axis=2)
-
-        try:
-            import numexpr as ne  # fuse them with numexpr
-
-            visualized_image = ne.evaluate("demo * (1 - alpha / 255.0) + rgb * (alpha / 255.0)")
-        except ImportError:
-            alpha = alpha.astype("float32") / 255.0
-            visualized_image = img * (1 - alpha) + rgb * alpha
-
-        visualized_image = visualized_image.astype("uint8")
-
-        return visualized_image
-
-
-class Visualizer:
-    def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE):
-        """
-        Args:
-            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
-                the height and width of the image respectively. C is the number of
-                color channels. The image is required to be in RGB format since that
-                is a requirement of the Matplotlib library. The image is also expected
-                to be in the range [0, 255].
-            metadata (MetadataCatalog): image metadata.
-        """
-        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
-        self.metadata = metadata
-        self.output = VisImage(self.img, scale=scale)
-        self.cpu_device = torch.device("cpu")
-
-        # too small texts are useless, therefore clamp to 9
-        self._default_font_size = max(
-            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
-        )
-        self._instance_mode = instance_mode
-
-    def draw_instance_predictions(self, predictions):
-        """
-        Draw instance-level prediction results on an image.
-
-        Args:
-            predictions (Instances): the output of an instance detection/segmentation
-                model. Following fields will be used to draw:
-                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
-        scores = predictions.scores if predictions.has("scores") else None
-        classes = predictions.pred_classes if predictions.has("pred_classes") else None
-        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
-        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
-
-        if predictions.has("pred_masks"):
-            masks = np.asarray(predictions.pred_masks)
-            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
-        else:
-            masks = None
-
-        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-            colors = [
-                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
-            ]
-            alpha = 0.8
-        else:
-            colors = None
-            alpha = 0.5
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.img = self._create_grayscale_image(
-                (predictions.pred_masks.any(dim=0) > 0).numpy()
-            )
-            alpha = 0.3
-
-        self.overlay_instances(
-            masks=masks,
-            boxes=boxes,
-            labels=labels,
-            keypoints=keypoints,
-            assigned_colors=colors,
-            alpha=alpha,
-        )
-        return self.output
-
-    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
-        """
-        Draw semantic segmentation predictions/labels.
-
-        Args:
-            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
-                Each value is the integer label of the pixel.
-            area_threshold (int): segments with less than `area_threshold` are not drawn.
-            alpha (float): the larger it is, the more opaque the segmentations are.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        if isinstance(sem_seg, torch.Tensor):
-            sem_seg = sem_seg.numpy()
-        labels, areas = np.unique(sem_seg, return_counts=True)
-        sorted_idxs = np.argsort(-areas).tolist()
-        labels = labels[sorted_idxs]
-        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
-            except (AttributeError, IndexError):
-                mask_color = None
-
-            binary_mask = (sem_seg == label).astype(np.uint8)
-            text = self.metadata.stuff_classes[label]
-            self.draw_binary_mask(
-                binary_mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-        return self.output
-
-    def draw_panoptic_seg_predictions(
-        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
-    ):
-        """
-        Draw panoptic prediction results on an image.
-
-        Args:
-            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
-                segment.
-            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
-                Each dict contains keys "id", "category_id", "isthing".
-            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        pred = _PanopticPrediction(panoptic_seg, segments_info)
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
-
-        # draw mask for all semantic segments first i.e. "stuff"
-        for mask, sinfo in pred.semantic_masks():
-            category_idx = sinfo["category_id"]
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
-            except AttributeError:
-                mask_color = None
-
-            text = self.metadata.stuff_classes[category_idx]
-            self.draw_binary_mask(
-                mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-
-        # draw mask for all instances second
-        all_instances = list(pred.instance_masks())
-        if len(all_instances) == 0:
-            return self.output
-        masks, sinfo = list(zip(*all_instances))
-        category_ids = [x["category_id"] for x in sinfo]
-
-        try:
-            scores = [x["score"] for x in sinfo]
-        except KeyError:
-            scores = None
-        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
-
-        try:
-            colors = [random_color(rgb=True, maximum=1) for k in category_ids]
-        except AttributeError:
-            colors = None
-        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
-
-        return self.output
-
-    def draw_dataset_dict(self, dic):
-        """
-        Draw annotations/segmentaions in Detectron2 Dataset format.
-
-        Args:
-            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        annos = dic.get("annotations", None)
-        if annos:
-            if "segmentation" in annos[0]:
-                masks = [x["segmentation"] for x in annos]
-            else:
-                masks = None
-            if "keypoints" in annos[0]:
-                keypts = [x["keypoints"] for x in annos]
-                keypts = np.array(keypts).reshape(len(annos), -1, 3)
-            else:
-                keypts = None
-
-            boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos]
-
-            labels = [x["category_id"] for x in annos]
-            colors = None
-            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-                colors = [
-                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
-                ]
-            names = self.metadata.get("thing_classes", None)
-            if names:
-                labels = [names[i] for i in labels]
-            labels = [
-                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
-                for i, a in zip(labels, annos)
-            ]
-            self.overlay_instances(
-                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
-            )
-
-        sem_seg = dic.get("sem_seg", None)
-        if sem_seg is None and "sem_seg_file_name" in dic:
-            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
-                sem_seg = Image.open(f)
-                sem_seg = np.asarray(sem_seg, dtype="uint8")
-        if sem_seg is not None:
-            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
-        return self.output
-
-    def overlay_instances(
-        self,
-        *,
-        boxes=None,
-        labels=None,
-        masks=None,
-        keypoints=None,
-        assigned_colors=None,
-        alpha=0.5
-    ):
-        """
-        Args:
-            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
-                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
-                or a :class:`RotatedBoxes`,
-                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image,
-            labels (list[str]): the text to be displayed for each instance.
-            masks (masks-like object): Supported types are:
-
-                * :class:`detectron2.structures.PolygonMasks`,
-                  :class:`detectron2.structures.BitMasks`.
-                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
-                  The first level of the list corresponds to individual instances. The second
-                  level to all the polygon that compose the instance, and the third level
-                  to the polygon coordinates. The third level should have the format of
-                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
-                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
-                * list[dict]: each dict is a COCO-style RLE.
-            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
-                where the N is the number of instances and K is the number of keypoints.
-                The last dimension corresponds to (x, y, visibility or score).
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        num_instances = None
-        if boxes is not None:
-            boxes = self._convert_boxes(boxes)
-            num_instances = len(boxes)
-        if masks is not None:
-            masks = self._convert_masks(masks)
-            if num_instances:
-                assert len(masks) == num_instances
-            else:
-                num_instances = len(masks)
-        if keypoints is not None:
-            if num_instances:
-                assert len(keypoints) == num_instances
-            else:
-                num_instances = len(keypoints)
-            keypoints = self._convert_keypoints(keypoints)
-        if labels is not None:
-            assert len(labels) == num_instances
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-        if boxes is not None and boxes.shape[1] == 5:
-            return self.overlay_rotated_instances(
-                boxes=boxes, labels=labels, assigned_colors=assigned_colors
-            )
-
-        # Display in largest to smallest order to reduce occlusion.
-        areas = None
-        if boxes is not None:
-            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
-        elif masks is not None:
-            areas = np.asarray([x.area() for x in masks])
-
-        if areas is not None:
-            sorted_idxs = np.argsort(-areas).tolist()
-            # Re-order overlapped instances in descending order.
-            boxes = boxes[sorted_idxs] if boxes is not None else None
-            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
-            assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
-            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
-
-        for i in range(num_instances):
-            color = assigned_colors[i]
-            if boxes is not None:
-                self.draw_box(boxes[i], edge_color=color)
-
-            if masks is not None:
-                for segment in masks[i].polygons:
-                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
-
-            if labels is not None:
-                # first get a box
-                if boxes is not None:
-                    x0, y0, x1, y1 = boxes[i]
-                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
-                    horiz_align = "left"
-                elif masks is not None:
-                    x0, y0, x1, y1 = masks[i].bbox()
-
-                    # draw text in the center (defined by median) when box is not drawn
-                    # median is less sensitive to outliers.
-                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
-                    horiz_align = "center"
-                else:
-                    continue  # drawing the box confidence for keypoints isn't very useful.
-                # for small objects, draw text at the side to avoid occlusion
-                instance_area = (y1 - y0) * (x1 - x0)
-                if (
-                    instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
-                    or y1 - y0 < 40 * self.output.scale
-                ):
-                    if y1 >= self.output.height - 5:
-                        text_pos = (x1, y0)
-                    else:
-                        text_pos = (x0, y1)
-
-                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
-                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-                font_size = (
-                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
-                    * 0.5
-                    * self._default_font_size
-                )
-                self.draw_text(
-                    labels[i],
-                    text_pos,
-                    color=lighter_color,
-                    horizontal_alignment=horiz_align,
-                    font_size=font_size,
-                )
-
-        # draw keypoints
-        if keypoints is not None:
-            for keypoints_per_instance in keypoints:
-                self.draw_and_connect_keypoints(keypoints_per_instance)
-
-        return self.output
-
-    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
-        """
-        Args:
-            boxes (ndarray): an Nx5 numpy array of
-                (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image.
-            labels (list[str]): the text to be displayed for each instance.
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-
-        num_instances = len(boxes)
-
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-
-        # Display in largest to smallest order to reduce occlusion.
-        if boxes is not None:
-            areas = boxes[:, 2] * boxes[:, 3]
-
-        sorted_idxs = np.argsort(-areas).tolist()
-        # Re-order overlapped instances in descending order.
-        boxes = boxes[sorted_idxs]
-        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-        colors = [assigned_colors[idx] for idx in sorted_idxs]
-
-        for i in range(num_instances):
-            self.draw_rotated_box_with_label(
-                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
-            )
-
-        return self.output
-
-    def draw_and_connect_keypoints(self, keypoints):
-        """
-        Draws keypoints of an instance and follows the rules for keypoint connections
-        to draw lines between appropriate keypoints. This follows color heuristics for
-        line color.
-
-        Args:
-            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
-                and the last dimension corresponds to (x, y, probability).
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        visible = {}
-        keypoint_names = self.metadata.get("keypoint_names")
-        for idx, keypoint in enumerate(keypoints):
-            # draw keypoint
-            x, y, prob = keypoint
-            if prob > _KEYPOINT_THRESHOLD:
-                self.draw_circle((x, y), color=_RED)
-                if keypoint_names:
-                    keypoint_name = keypoint_names[idx]
-                    visible[keypoint_name] = (x, y)
-
-        if self.metadata.get("keypoint_connection_rules"):
-            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
-                if kp0 in visible and kp1 in visible:
-                    x0, y0 = visible[kp0]
-                    x1, y1 = visible[kp1]
-                    color = tuple(x / 255.0 for x in color)
-                    self.draw_line([x0, x1], [y0, y1], color=color)
-
-        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
-        # Note that this strategy is specific to person keypoints.
-        # For other keypoints, it should just do nothing
-        try:
-            ls_x, ls_y = visible["left_shoulder"]
-            rs_x, rs_y = visible["right_shoulder"]
-            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
-        except KeyError:
-            pass
-        else:
-            # draw line from nose to mid-shoulder
-            nose_x, nose_y = visible.get("nose", (None, None))
-            if nose_x is not None:
-                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
-
-            try:
-                # draw line from mid-shoulder to mid-hip
-                lh_x, lh_y = visible["left_hip"]
-                rh_x, rh_y = visible["right_hip"]
-            except KeyError:
-                pass
-            else:
-                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
-                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
-        return self.output
-
-    """
-    Primitive drawing functions:
-    """
-
-    def draw_text(
-        self,
-        text,
-        position,
-        *,
-        font_size=None,
-        color="g",
-        horizontal_alignment="center",
-        rotation=0
-    ):
-        """
-        Args:
-            text (str): class label
-            position (tuple): a tuple of the x and y coordinates to place text on image.
-            font_size (int, optional): font of the text. If not provided, a font size
-                proportional to the image width is calculated and used.
-            color: color of the text. Refer to `matplotlib.colors` for full list
-                of formats that are accepted.
-            horizontal_alignment (str): see `matplotlib.text.Text`
-            rotation: rotation angle in degrees CCW
-
-        Returns:
-            output (VisImage): image object with text drawn.
-        """
-        if not font_size:
-            font_size = self._default_font_size
-
-        # since the text background is dark, we don't want the text to be dark
-        color = np.maximum(list(mplc.to_rgb(color)), 0.2)
-        color[np.argmax(color)] = max(0.8, np.max(color))
-
-        x, y = position
-        self.output.ax.text(
-            x,
-            y,
-            text,
-            size=font_size * self.output.scale,
-            family="sans-serif",
-            bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
-            verticalalignment="top",
-            horizontalalignment=horizontal_alignment,
-            color=color,
-            zorder=10,
-            rotation=rotation,
-        )
-        return self.output
-
-    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
-        """
-        Args:
-            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
-                are the coordinates of the image's top left corner. x1 and y1 are the
-                coordinates of the image's bottom right corner.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x0, y0, x1, y1 = box_coord
-        width = x1 - x0
-        height = y1 - y0
-
-        linewidth = max(self._default_font_size / 4, 1)
-
-        self.output.ax.add_patch(
-            mpl.patches.Rectangle(
-                (x0, y0),
-                width,
-                height,
-                fill=False,
-                edgecolor=edge_color,
-                linewidth=linewidth * self.output.scale,
-                alpha=alpha,
-                linestyle=line_style,
-            )
-        )
-        return self.output
-
-    def draw_rotated_box_with_label(
-        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
-    ):
-        """
-        Args:
-            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
-                where cnt_x and cnt_y are the center coordinates of the box.
-                w and h are the width and height of the box. angle represents how
-                many degrees the box is rotated CCW with regard to the 0-degree box.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-            label (string): label for rotated box. It will not be rendered when set to None.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        cnt_x, cnt_y, w, h, angle = rotated_box
-        area = w * h
-        # use thinner lines when the box is small
-        linewidth = self._default_font_size / (
-            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
-        )
-
-        theta = angle * math.pi / 180.0
-        c = math.cos(theta)
-        s = math.sin(theta)
-        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
-        # x: left->right ; y: top->down
-        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
-        for k in range(4):
-            j = (k + 1) % 4
-            self.draw_line(
-                [rotated_rect[k][0], rotated_rect[j][0]],
-                [rotated_rect[k][1], rotated_rect[j][1]],
-                color=edge_color,
-                linestyle="--" if k == 1 else line_style,
-                linewidth=linewidth,
-            )
-
-        if label is not None:
-            text_pos = rotated_rect[1]  # topleft corner
-
-            height_ratio = h / np.sqrt(self.output.height * self.output.width)
-            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
-            font_size = (
-                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
-            )
-            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
-
-        return self.output
-
-    def draw_circle(self, circle_coord, color, radius=3):
-        """
-        Args:
-            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
-                of the center of the circle.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            radius (int): radius of the circle.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x, y = circle_coord
-        self.output.ax.add_patch(
-            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
-        )
-        return self.output
-
-    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
-        """
-        Args:
-            x_data (list[int]): a list containing x values of all the points being drawn.
-                Length of list should match the length of y_data.
-            y_data (list[int]): a list containing y values of all the points being drawn.
-                Length of list should match the length of x_data.
-            color: color of the line. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
-                for a full list of formats that are accepted.
-            linewidth (float or None): width of the line. When it's None,
-                a default value will be computed and used.
-
-        Returns:
-            output (VisImage): image object with line drawn.
-        """
-        if linewidth is None:
-            linewidth = self._default_font_size / 3
-        linewidth = max(linewidth, 1)
-        self.output.ax.add_line(
-            mpl.lines.Line2D(
-                x_data,
-                y_data,
-                linewidth=linewidth * self.output.scale,
-                color=color,
-                linestyle=linestyle,
-            )
-        )
-        return self.output
-
-    def draw_binary_mask(
-        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096
-    ):
-        """
-        Args:
-            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
-                W is the image width. Each value in the array is either a 0 or 1 value of uint8
-                type.
-            color: color of the mask. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted. If None, will pick a random color.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted.
-            text (str): if None, will be drawn in the object's center of mass.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            area_threshold (float): a connected component small than this will not be shown.
-
-        Returns:
-            output (VisImage): image object with mask drawn.
-        """
-        if color is None:
-            color = random_color(rgb=True, maximum=1)
-        if area_threshold is None:
-            area_threshold = 4096
-
-        has_valid_segment = False
-        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
-        mask = GenericMask(binary_mask, self.output.height, self.output.width)
-        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
-
-        if not mask.has_holes:
-            # draw polygons for regular masks
-            for segment in mask.polygons:
-                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
-                if area < area_threshold:
-                    continue
-                has_valid_segment = True
-                segment = segment.reshape(-1, 2)
-                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
-        else:
-            rgba = np.zeros(shape2d + (4,), dtype="float32")
-            rgba[:, :, :3] = color
-            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
-            has_valid_segment = True
-            self.output.ax.imshow(rgba)
-
-        if text is not None and has_valid_segment:
-            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
-            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
-            largest_component_id = np.argmax(stats[1:, -1]) + 1
-
-            # draw text on the largest component, as well as other very large components.
-            for cid in range(1, _num_cc):
-                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
-                    # median is more stable than centroid
-                    # center = centroids[largest_component_id]
-                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
-                    self.draw_text(text, center, color=lighter_color)
-        return self.output
-
-    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
-        """
-        Args:
-            segment: numpy array of shape Nx2, containing all the points in the polygon.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted. If not provided, a darker shade
-                of the polygon color will be used instead.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-
-        Returns:
-            output (VisImage): image object with polygon drawn.
-        """
-        if edge_color is None:
-            # make edge color darker than the polygon color
-            if alpha > 0.8:
-                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
-            else:
-                edge_color = color
-        edge_color = mplc.to_rgb(edge_color) + (1,)
-
-        polygon = mpl.patches.Polygon(
-            segment,
-            fill=True,
-            facecolor=mplc.to_rgb(color) + (alpha,),
-            edgecolor=edge_color,
-            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
-        )
-        self.output.ax.add_patch(polygon)
-        return self.output
-
-    """
-    Internal methods:
-    """
-
-    def _jitter(self, color):
-        """
-        Randomly modifies given color to produce a slightly different color than the color given.
-
-        Args:
-            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
-                picked. The values in the list are in the [0.0, 1.0] range.
-
-        Returns:
-            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
-                color after being jittered. The values in the list are in the [0.0, 1.0] range.
-        """
-        color = mplc.to_rgb(color)
-        vec = np.random.rand(3)
-        # better to do it in another color space
-        vec = vec / np.linalg.norm(vec) * 0.5
-        res = np.clip(vec + color, 0, 1)
-        return tuple(res)
-
-    def _create_grayscale_image(self, mask=None):
-        """
-        Create a grayscale version of the original image.
-        The colors in masked area, if given, will be kept.
-        """
-        img_bw = self.img.astype("f4").mean(axis=2)
-        img_bw = np.stack([img_bw] * 3, axis=2)
-        if mask is not None:
-            img_bw[mask] = self.img[mask]
-        return img_bw
-
-    def _change_color_brightness(self, color, brightness_factor):
-        """
-        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
-        less or more saturation than the original color.
-
-        Args:
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
-                0 will correspond to no change, a factor in [-1.0, 0) range will result in
-                a darker color and a factor in (0, 1.0] range will result in a lighter color.
-
-        Returns:
-            modified_color (tuple[double]): a tuple containing the RGB values of the
-                modified color. Each value in the tuple is in the [0.0, 1.0] range.
-        """
-        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
-        color = mplc.to_rgb(color)
-        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
-        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
-        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
-        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
-        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
-        return modified_color
-
-    def _convert_boxes(self, boxes):
-        """
-        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
-        """
-        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
-            return boxes.tensor.numpy()
-        else:
-            return np.asarray(boxes)
-
-    def _convert_masks(self, masks_or_polygons):
-        """
-        Convert different format of masks or polygons to a tuple of masks and polygons.
-
-        Returns:
-            list[GenericMask]:
-        """
-
-        m = masks_or_polygons
-        if isinstance(m, PolygonMasks):
-            m = m.polygons
-        if isinstance(m, BitMasks):
-            m = m.tensor.numpy()
-        if isinstance(m, torch.Tensor):
-            m = m.numpy()
-        ret = []
-        for x in m:
-            if isinstance(x, GenericMask):
-                ret.append(x)
-            else:
-                ret.append(GenericMask(x, self.output.height, self.output.width))
-        return ret
-
-    def _convert_keypoints(self, keypoints):
-        if isinstance(keypoints, Keypoints):
-            keypoints = keypoints.tensor
-        keypoints = np.asarray(keypoints)
-        return keypoints
-
-    def get_output(self):
-        """
-        Returns:
-            output (VisImage): the image output containing the visualizations added
-            to the image.
-        """
-        return self.output
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md b/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md
deleted file mode 100644
index cc0d329..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-## Some scripts for developers to use, include:
-
-- `linter.sh`: lint the codebase before commit
-- `run_{inference,instant}_tests.sh`: run inference/training for a few iterations.
-   Note that these tests require 2 GPUs.
-- `parse_results.sh`: parse results from a log file.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh
deleted file mode 100644
index fd7081d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# Run this script at project root by "./dev/linter.sh" before you commit
-
-vergte() {
-  [ "$2" = "$(echo -e "$1\\n$2" | sort -V | head -n1)" ]
-}
-
-{
-  black --version | grep -E "(19.3b0.*6733274)|(19.3b0\\+8)" > /dev/null
-} || {
-	echo "Linter requires 'black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2' !"
-	exit 1
-}
-
-ISORT_TARGET_VERSION="4.3.21"
-ISORT_VERSION=$(isort -v | grep VERSION | awk '{print $2}')
-vergte "$ISORT_VERSION" "$ISORT_TARGET_VERSION" || {
-  echo "Linter requires isort>=${ISORT_TARGET_VERSION} !"
-  exit 1
-}
-
-set -v
-
-echo "Running isort ..."
-isort -y -sp . --atomic
-
-echo "Running black ..."
-black -l 100 .
-
-echo "Running flake8 ..."
-if [ -x "$(command -v flake8-3)" ]; then
-  flake8-3 .
-else
-  python3 -m flake8 .
-fi
-
-# echo "Running mypy ..."
-# Pytorch does not have enough type annotations
-# mypy detectron2/solver detectron2/structures detectron2/config
-
-echo "Running clang-format ..."
-find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i
-
-command -v arc > /dev/null && arc lint
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md
deleted file mode 100644
index 095684f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-
-## To build a cu101 wheel for release:
-
-```
-$ nvidia-docker run -it --storage-opt "size=20GB" --name pt  pytorch/manylinux-cuda101
-# inside the container:
-# git clone https://github.com/facebookresearch/detectron2/
-# cd detectron2
-# export CU_VERSION=cu101 D2_VERSION_SUFFIX= PYTHON_VERSION=3.7 PYTORCH_VERSION=1.4
-# ./dev/packaging/build_wheel.sh
-```
-
-## To build all wheels for `CUDA {9.2,10.0,10.1}` x `Python {3.6,3.7,3.8}`:
-```
-./dev/packaging/build_all_wheels.sh
-./dev/packaging/gen_wheel_index.sh /path/to/wheels
-```
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh
deleted file mode 100644
index eb64dea..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-PYTORCH_VERSION=1.5
-
-build_for_one_cuda() {
-  cu=$1
-
-  case "$cu" in
-    cu*)
-      container_name=manylinux-cuda${cu/cu/}
-      ;;
-    cpu)
-      container_name=manylinux-cuda101
-      ;;
-    *)
-      echo "Unrecognized cu=$cu"
-      exit 1
-      ;;
-  esac
-
-  echo "Launching container $container_name ..."
-
-  for py in 3.6 3.7 3.8; do
-    docker run -itd \
-      --name $container_name \
-      --mount type=bind,source="$(pwd)",target=/detectron2 \
-      pytorch/$container_name
-
-    cat <<EOF | docker exec -i $container_name sh
-      export CU_VERSION=$cu D2_VERSION_SUFFIX=+$cu PYTHON_VERSION=$py
-      export PYTORCH_VERSION=$PYTORCH_VERSION
-      cd /detectron2 && ./dev/packaging/build_wheel.sh
-EOF
-
-#     if [[ "$cu" == "cu101" ]]; then
-#       # build wheel without local version
-#       cat <<EOF | docker exec -i $container_name sh
-#         export CU_VERSION=$cu D2_VERSION_SUFFIX= PYTHON_VERSION=$py
-#         export PYTORCH_VERSION=$PYTORCH_VERSION
-#         cd /detectron2 && ./dev/packaging/build_wheel.sh
-# EOF
-#     fi
-
-    docker exec -i $container_name rm -rf /detectron2/build/$cu
-    docker container stop $container_name
-    docker container rm $container_name
-  done
-}
-
-if [[ -n "$1" ]]; then
-  build_for_one_cuda "$1"
-else
-  for cu in cu102 cu101 cu92 cpu; do
-    build_for_one_cuda "$cu"
-  done
-fi
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_wheel.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_wheel.sh
deleted file mode 100644
index bc80b56..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_wheel.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-set -ex
-
-ldconfig  # https://github.com/NVIDIA/nvidia-docker/issues/854
-
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-. "$script_dir/pkg_helpers.bash"
-
-echo "Build Settings:"
-echo "CU_VERSION: $CU_VERSION"                 # e.g. cu101
-echo "D2_VERSION_SUFFIX: $D2_VERSION_SUFFIX"   # e.g. +cu101 or ""
-echo "PYTHON_VERSION: $PYTHON_VERSION"         # e.g. 3.6
-echo "PYTORCH_VERSION: $PYTORCH_VERSION"       # e.g. 1.4
-
-setup_cuda
-setup_wheel_python
-yum install ninja-build -y && ln -sv /usr/bin/ninja-build /usr/bin/ninja
-
-export TORCH_VERSION_SUFFIX="+$CU_VERSION"
-if [[ "$CU_VERSION" == "cu102" ]]; then
-	export TORCH_VERSION_SUFFIX=""
-fi
-pip_install pip numpy -U
-pip_install "torch==$PYTORCH_VERSION$TORCH_VERSION_SUFFIX" \
-	-f https://download.pytorch.org/whl/$CU_VERSION/torch_stable.html
-
-# use separate directories to allow parallel build
-BASE_BUILD_DIR=build/$CU_VERSION/$PYTHON_VERSION
-python setup.py \
-  build -b $BASE_BUILD_DIR \
-  bdist_wheel -b $BASE_BUILD_DIR/build_dist -d wheels/$CU_VERSION
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh
deleted file mode 100644
index 44d6041..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-
-root=$1
-if [[ -z "$root" ]]; then
-  echo "Usage: ./gen_wheel_index.sh /path/to/wheels"
-  exit
-fi
-
-index=$root/index.html
-
-cd "$root"
-for cu in cpu cu92 cu100 cu101 cu102; do
-  cd $cu
-  echo "Creating $PWD/index.html ..."
-  for whl in *.whl; do
-    echo "<a href=\"${whl/+/%2B}\">$whl</a><br>"
-  done > index.html
-  cd "$root"
-done
-
-echo "Creating $index ..."
-for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort); do
-  echo "<a href=\"${whl/+/%2B}\">$whl</a><br>"
-done > "$index"
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash
deleted file mode 100644
index 51e6185..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-# Install with pip a bit more robustly than the default
-pip_install() {
-  retry pip install --progress-bar off "$@"
-}
-
-
-setup_cuda() {
-  # Now work out the CUDA settings
-  # Like other torch domain libraries, we choose common GPU architectures only.
-  export FORCE_CUDA=1
-  case "$CU_VERSION" in
-    cu102)
-      export CUDA_HOME=/usr/local/cuda-10.2/
-      export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX"
-      ;;
-    cu101)
-      export CUDA_HOME=/usr/local/cuda-10.1/
-      export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX"
-      ;;
-    cu100)
-      export CUDA_HOME=/usr/local/cuda-10.0/
-      export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX"
-      ;;
-    cu92)
-      export CUDA_HOME=/usr/local/cuda-9.2/
-      export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX"
-      ;;
-    cpu)
-      unset FORCE_CUDA
-      export CUDA_VISIBLE_DEVICES=
-      ;;
-    *)
-      echo "Unrecognized CU_VERSION=$CU_VERSION"
-      exit 1
-      ;;
-  esac
-}
-
-setup_wheel_python() {
-  case "$PYTHON_VERSION" in
-    3.6) python_abi=cp36-cp36m ;;
-    3.7) python_abi=cp37-cp37m ;;
-    3.8) python_abi=cp38-cp38 ;;
-    *)
-      echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
-      exit 1
-      ;;
-  esac
-  export PATH="/opt/python/$python_abi/bin:$PATH"
-}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh
deleted file mode 100644
index 874b688..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# A shell script that parses metrics from the log file.
-# Make it easier for developers to track performance of models.
-
-LOG="$1"
-
-if [[ -z "$LOG" ]]; then
-	echo "Usage: $0 /path/to/log/file"
-	exit 1
-fi
-
-# [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it)
-# [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / demo per device, on 8 devices)
-# [12/15 11:49:03] inference INFO: Total inference pure compute time: .....
-
-# training time
-trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*')
-echo "Training speed: $trainspeed s/it"
-
-# inference time: there could be multiple inference during training
-inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1)
-echo "Inference speed: $inferencespeed s/it"
-
-# [12/15 11:47:18] trainer INFO: eta: 0:00:00  iter: 90000  loss: 0.5407 (0.7256)  loss_classifier: 0.1744 (0.2446)  loss_box_reg: 0.0838 (0.1160)  loss_mask: 0.2159 (0.2722)  loss_objectness: 0.0244 (0.0429)  loss_rpn_box_reg: 0.0279 (0.0500)  time: 0.4487 (0.4899)  data: 0.0076 (0.0975) lr: 0.000200  max mem: 4161
-memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*')
-echo "Training memory: $memory MB"
-
-echo "Easy to copypaste:"
-echo "$trainspeed","$inferencespeed","$memory"
-
-echo "------------------------------"
-
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
-# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011
-
-echo "COCO Results:"
-num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l)
-# each task has 3 lines
-grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh
deleted file mode 100644
index 17e422d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-BIN="python tools/train_net.py"
-OUTPUT="inference_test_output"
-NUM_GPUS=2
-
-CFG_LIST=( "${@:1}" )
-
-if [ ${#CFG_LIST[@]} -eq 0 ]; then
-  CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
-fi
-
-echo "========================================================================"
-echo "Configs to run:"
-echo "${CFG_LIST[@]}"
-echo "========================================================================"
-
-
-for cfg in "${CFG_LIST[@]}"; do
-    echo "========================================================================"
-    echo "Running $cfg ..."
-    echo "========================================================================"
-    $BIN \
-      --eval-only \
-      --num-gpus $NUM_GPUS \
-      --config-file "$cfg" \
-      OUTPUT_DIR $OUTPUT
-      rm -rf $OUTPUT
-done
-
-
-echo "========================================================================"
-echo "Running demo.py ..."
-echo "========================================================================"
-DEMO_BIN="python demo/demo.py"
-COCO_DIR=datasets/coco/val2014
-mkdir -pv $OUTPUT
-
-set -v
-
-$DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \
-  --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT
-rm -rf $OUTPUT
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh
deleted file mode 100644
index 2c51de6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-BIN="python tools/train_net.py"
-OUTPUT="instant_test_output"
-NUM_GPUS=2
-
-CFG_LIST=( "${@:1}" )
-if [ ${#CFG_LIST[@]} -eq 0 ]; then
-  CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
-fi
-
-echo "========================================================================"
-echo "Configs to run:"
-echo "${CFG_LIST[@]}"
-echo "========================================================================"
-
-for cfg in "${CFG_LIST[@]}"; do
-    echo "========================================================================"
-    echo "Running $cfg ..."
-    echo "========================================================================"
-    $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
-      SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \
-      OUTPUT_DIR "$OUTPUT"
-    rm -rf "$OUTPUT"
-done
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile b/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile
deleted file mode 100644
index 2a86039..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile
+++ /dev/null
@@ -1,49 +0,0 @@
-FROM nvidia/cuda:10.1-cudnn7-devel
-
-ENV DEBIAN_FRONTEND noninteractive
-RUN apt-get update && apt-get install -y \
-	python3-opencv ca-certificates python3-dev git wget sudo  \
-	cmake ninja-build protobuf-compiler libprotobuf-dev && \
-  rm -rf /var/lib/apt/lists/*
-RUN ln -sv /usr/bin/python3 /usr/bin/python
-
-# create a non-root user
-ARG USER_ID=1000
-RUN useradd -m --no-log-init --system  --uid ${USER_ID} appuser -g sudo
-RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-USER appuser
-WORKDIR /home/appuser
-
-ENV PATH="/home/appuser/.local/bin:${PATH}"
-RUN wget https://bootstrap.pypa.io/get-pip.py && \
-	python3 get-pip.py --user && \
-	rm get-pip.py
-
-# install dependencies
-# See https://pytorch.org/ for other options if you use a different version of CUDA
-RUN pip install --user tensorboard cython
-RUN pip install --user torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
-RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
-
-RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
-# install detectron2
-RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo
-# set FORCE_CUDA because during `docker build` cuda is not accessible
-ENV FORCE_CUDA="1"
-# This will by default build detectron2 for all common cuda architectures and take a lot more time,
-# because inside `docker build`, there is no way to tell which architecture will be used.
-ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing"
-ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
-
-RUN pip install --user -e detectron2_repo
-
-# Set a fixed model cache directory.
-ENV FVCORE_CACHE="/tmp"
-WORKDIR /home/appuser/detectron2_repo
-
-# run detectron2 under user "appuser":
-# wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg
-# python3 demo/demo.py  \
-	#--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
-	#--input input.jpg --output outputs/ \
-	#--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci b/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci
deleted file mode 100644
index bc0be84..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM nvidia/cuda:10.1-cudnn7-devel
-# This dockerfile only aims to provide an environment for unittest on CircleCI
-
-ENV DEBIAN_FRONTEND noninteractive
-RUN apt-get update && apt-get install -y \
-	python3-opencv ca-certificates python3-dev git wget sudo ninja-build && \
-  rm -rf /var/lib/apt/lists/*
-
-RUN wget -q https://bootstrap.pypa.io/get-pip.py && \
-	python3 get-pip.py && \
-	rm get-pip.py
-
-# install dependencies
-# See https://pytorch.org/ for other options if you use a different version of CUDA
-RUN pip install tensorboard cython
-RUN pip install torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
-RUN pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md
deleted file mode 100644
index 760c405..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-
-## Use the container (with docker ≥ 19.03)
-
-```
-cd docker/
-# Build:
-docker build --build-arg USER_ID=$UID -t detectron2:v0 .
-# Run:
-docker run --gpus all -it \
-	--shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-	--name=detectron2 detectron2:v0
-
-# Grant docker access to host X server to show images
-xhost +local:`docker inspect --format='{{ .Config.Hostname }}' detectron2`
-```
-
-## Use the container (with docker < 19.03)
-
-Install docker-compose and nvidia-docker2, then run:
-```
-cd docker && USER_ID=$UID docker-compose run detectron2
-```
-
-#### Using a persistent cache directory
-
-You can prevent models from being re-downloaded on every run,
-by storing them in a cache directory.
-
-To do this, add `--volume=$HOME/.torch/fvcore_cache:/tmp:rw` in the run command.
-
-## Install new dependencies
-Add the following to `Dockerfile` to make persistent changes.
-```
-RUN sudo apt-get update && sudo apt-get install -y vim
-```
-Or run them in the container to make temporary changes.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml b/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml
deleted file mode 100644
index e660f44..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-version: "2.3"
-services:
-  detectron2:
-    build:
-      context: .
-      dockerfile: Dockerfile
-      args:
-        USER_ID: ${USER_ID:-1000}
-    runtime: nvidia  # TODO: Exchange with "gpu: all" in the future (see https://github.com/facebookresearch/detectron2/pull/197/commits/00545e1f376918db4a8ce264d427a07c1e896c5a).
-    shm_size: "8gb"
-    ulimits:
-      memlock: -1
-      stack: 67108864
-    volumes:
-      - /tmp/.X11-unix:/tmp/.X11-unix:ro
-    environment:
-      - DISPLAY=$DISPLAY
-      - NVIDIA_VISIBLE_DEVICES=all
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore b/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore
deleted file mode 100644
index e35d885..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-_build
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile b/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile
deleted file mode 100644
index d537643..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# Minimal makefile for Sphinx documentation
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md
deleted file mode 100644
index 2c65c36..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Read the docs:
-
-The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/).
-Documents in this directory are not meant to be read on github.
-
-# Build the docs:
-
-1. Install detectron2 according to [INSTALL.md](INSTALL.md).
-2. Install additional libraries required to build docs:
-  - docutils==0.16
-  - Sphinx==3.0.0
-  - recommonmark==0.6.0
-  - sphinx_rtd_theme
-  - mock
-
-3. Run `make html` from this directory.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py b/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py
deleted file mode 100644
index 44e9f2b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-# flake8: noqa
-
-# Configuration file for the Sphinx documentation builder.
-#
-# This file does only contain a selection of the most common options. For a
-# full list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-import mock
-from sphinx.domains import Domain
-from typing import Dict, List, Tuple
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-import sphinx_rtd_theme
-
-
-class GithubURLDomain(Domain):
-    """
-    Resolve certain links in markdown files to github source.
-    """
-
-    name = "githuburl"
-    ROOT = "https://github.com/facebookresearch/detectron2/blob/master/"
-    LINKED_DOC = ["tutorials/install", "tutorials/getting_started"]
-
-    def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):
-        github_url = None
-        if not target.endswith("html") and target.startswith("../../"):
-            url = target.replace("../", "")
-            github_url = url
-        if fromdocname in self.LINKED_DOC:
-            # unresolved links in these docs are all github links
-            github_url = target
-
-        if github_url is not None:
-            if github_url.endswith("MODEL_ZOO") or github_url.endswith("README"):
-                # bug of recommonmark.
-                # https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155
-                github_url += ".md"
-            print("Ref {} resolved to github:{}".format(target, github_url))
-            contnode["refuri"] = self.ROOT + github_url
-            return [("githuburl:any", contnode)]
-        else:
-            return []
-
-
-# to support markdown
-from recommonmark.parser import CommonMarkParser
-
-sys.path.insert(0, os.path.abspath("../"))
-os.environ["DOC_BUILDING"] = "True"
-DEPLOY = os.environ.get("READTHEDOCS") == "True"
-
-
-# -- Project information -----------------------------------------------------
-
-# fmt: off
-try:
-    import torch  # noqa
-except ImportError:
-    for m in [
-        "torch", "torchvision", "torch.nn", "torch.nn.parallel", "torch.distributed", "torch.multiprocessing", "torch.autograd",
-        "torch.autograd.function", "torch.nn.modules", "torch.nn.modules.utils", "torch.utils", "torch.utils.data", "torch.onnx",
-        "torchvision", "torchvision.ops",
-    ]:
-        sys.modules[m] = mock.Mock(name=m)
-    sys.modules['torch'].__version__ = "1.5"  # fake version
-
-for m in [
-    "cv2", "scipy", "portalocker", "detectron2._C",
-    "pycocotools", "pycocotools.mask", "pycocotools.coco", "pycocotools.cocoeval",
-    "google", "google.protobuf", "google.protobuf.internal", "onnx",
-    "caffe2", "caffe2.proto", "caffe2.python", "caffe2.python.utils", "caffe2.python.onnx", "caffe2.python.onnx.backend",
-]:
-    sys.modules[m] = mock.Mock(name=m)
-# fmt: on
-sys.modules["cv2"].__version__ = "3.4"
-
-import detectron2  # isort: skip
-
-
-project = "detectron2"
-copyright = "2019-2020, detectron2 contributors"
-author = "detectron2 contributors"
-
-# The short X.Y version
-version = detectron2.__version__
-# The full version, including alpha/beta/rc tags
-release = version
-
-
-# -- General configuration ---------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-needs_sphinx = "3.0"
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "recommonmark",
-    "sphinx.ext.autodoc",
-    "sphinx.ext.napoleon",
-    "sphinx.ext.intersphinx",
-    "sphinx.ext.todo",
-    "sphinx.ext.coverage",
-    "sphinx.ext.mathjax",
-    "sphinx.ext.viewcode",
-    "sphinx.ext.githubpages",
-]
-
-# -- Configurations for plugins ------------
-napoleon_google_docstring = True
-napoleon_include_init_with_doc = True
-napoleon_include_special_with_doc = True
-napoleon_numpy_docstring = False
-napoleon_use_rtype = False
-autodoc_inherit_docstrings = False
-autodoc_member_order = "bysource"
-
-if DEPLOY:
-    intersphinx_timeout = 10
-else:
-    # skip this when building locally
-    intersphinx_timeout = 0.1
-intersphinx_mapping = {
-    "python": ("https://docs.python.org/3.6", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "torch": ("https://pytorch.org/docs/master/", None),
-}
-# -------------------------
-
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-source_suffix = [".rst", ".md"]
-
-# The master toctree document.
-master_doc = "index"
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md", "tutorials/README.md"]
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = "sphinx"
-
-
-# -- Options for HTML output -------------------------------------------------
-
-html_theme = "sphinx_rtd_theme"
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# The default sidebars (for documents that don't match any pattern) are
-# defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
-#
-# html_sidebars = {}
-
-
-# -- Options for HTMLHelp output ---------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = "detectron2doc"
-
-
-# -- Options for LaTeX output ------------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, "detectron2.tex", "detectron2 Documentation", "detectron2 contributors", "manual")
-]
-
-
-# -- Options for manual page output ------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "detectron2", "detectron2 Documentation", [author], 1)]
-
-
-# -- Options for Texinfo output ----------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (
-        master_doc,
-        "detectron2",
-        "detectron2 Documentation",
-        author,
-        "detectron2",
-        "One line description of project.",
-        "Miscellaneous",
-    )
-]
-
-
-# -- Options for todo extension ----------------------------------------------
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = True
-
-
-_DEPRECATED_NAMES = set()
-
-
-def autodoc_skip_member(app, what, name, obj, skip, options):
-    # we hide something deliberately
-    if getattr(obj, "__HIDE_SPHINX_DOC__", False):
-        return True
-    # Hide some names that are deprecated or not intended to be used
-    if name in _DEPRECATED_NAMES:
-        return True
-    return None
-
-
-_PAPER_DATA = {
-    "resnet": ("1512.03385", "Deep Residual Learning for Image Recognition"),
-    "fpn": ("1612.03144", "Feature Pyramid Networks for Object Detection"),
-    "mask r-cnn": ("1703.06870", "Mask R-CNN"),
-    "faster r-cnn": (
-        "1506.01497",
-        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks",
-    ),
-    "deformconv": ("1703.06211", "Deformable Convolutional Networks"),
-    "deformconv2": ("1811.11168", "Deformable ConvNets v2: More Deformable, Better Results"),
-    "panopticfpn": ("1901.02446", "Panoptic Feature Pyramid Networks"),
-    "retinanet": ("1708.02002", "Focal Loss for Dense Object Detection"),
-    "cascade r-cnn": ("1712.00726", "Cascade R-CNN: Delving into High Quality Object Detection"),
-    "lvis": ("1908.03195", "LVIS: A Dataset for Large Vocabulary Instance Segmentation"),
-    "rrpn": ("1703.01086", "Arbitrary-Oriented Scene Text Detection via Rotation Proposals"),
-    "in1k1h": ("1706.02677", "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"),
-}
-
-
-def paper_ref_role(
-    typ: str,
-    rawtext: str,
-    text: str,
-    lineno: int,
-    inliner,
-    options: Dict = {},
-    content: List[str] = [],
-):
-    """
-    Parse :paper:`xxx`. Similar to the "extlinks" sphinx extension.
-    """
-    from docutils import nodes, utils
-    from sphinx.util.nodes import split_explicit_title
-
-    text = utils.unescape(text)
-    has_explicit_title, title, link = split_explicit_title(text)
-    link = link.lower()
-    if link not in _PAPER_DATA:
-        inliner.reporter.warning("Cannot find paper " + link)
-        paper_url, paper_title = "#", link
-    else:
-        paper_url, paper_title = _PAPER_DATA[link]
-        if "/" not in paper_url:
-            paper_url = "https://arxiv.org/abs/" + paper_url
-    if not has_explicit_title:
-        title = paper_title
-    pnode = nodes.reference(title, title, internal=False, refuri=paper_url)
-    return [pnode], []
-
-
-def setup(app):
-    from recommonmark.transform import AutoStructify
-
-    app.add_domain(GithubURLDomain)
-    app.connect("autodoc-skip-member", autodoc_skip_member)
-    app.add_role("paper", paper_ref_role)
-    app.add_config_value(
-        "recommonmark_config",
-        {"enable_math": True, "enable_inline_math": True, "enable_eval_rst": True},
-        True,
-    )
-    app.add_transform(AutoStructify)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst
deleted file mode 100644
index 8634b7b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. detectron2 documentation master file, created by
-   sphinx-quickstart on Sat Sep 21 13:46:45 2019.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to detectron2's documentation!
-======================================
-
-.. toctree::
-   :maxdepth: 2
-
-   tutorials/index
-   notes/index
-   modules/index
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst
deleted file mode 100644
index 616cb18..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.checkpoint package
-=============================
-
-.. automodule:: detectron2.checkpoint
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst
deleted file mode 100644
index 034bd5f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-detectron2.config package
-=========================
-
-.. automodule:: detectron2.config
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :inherited-members:
-
-
-Config References
------------------
-
-.. literalinclude:: ../../detectron2/config/defaults.py
-  :language: python
-  :linenos:
-  :lines: 4-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst
deleted file mode 100644
index 3697f0e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-detectron2.data package
-=======================
-
-.. automodule:: detectron2.data
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.data.detection\_utils module
----------------------------------------
-
-.. automodule:: detectron2.data.detection_utils
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.data.datasets module
----------------------------------------
-
-.. automodule:: detectron2.data.datasets
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.data.samplers module
----------------------------------------
-
-.. automodule:: detectron2.data.samplers
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.data.transforms module
----------------------------------------
-
-.. automodule:: detectron2.data.transforms
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst
deleted file mode 100644
index bb8b533..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-detectron2.engine package
-=========================
-
-
-.. automodule:: detectron2.engine
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.engine.defaults module
----------------------------------
-
-.. automodule:: detectron2.engine.defaults
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.engine.hooks module
----------------------------------
-
-.. automodule:: detectron2.engine.hooks
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst
deleted file mode 100644
index d9d34ff..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.evaluation package
-=============================
-
-.. automodule:: detectron2.evaluation
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst
deleted file mode 100644
index bb7c3c9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.export package
-=========================
-
-.. automodule:: detectron2.export
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst
deleted file mode 100644
index 1b246f5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-API Documentation
-==================
-
-.. toctree::
-
-    checkpoint
-    config
-    data
-    engine
-    evaluation
-    layers
-    model_zoo
-    modeling
-    solver
-    structures
-    utils
-    export
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst
deleted file mode 100644
index 6aeb521..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.layers package
-=========================
-
-.. automodule:: detectron2.layers
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst
deleted file mode 100644
index 8b1c7d5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.model_zoo package
-============================
-
-.. automodule:: detectron2.model_zoo
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst
deleted file mode 100644
index 58ccd2c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst
+++ /dev/null
@@ -1,58 +0,0 @@
-detectron2.modeling package
-===========================
-
-.. automodule:: detectron2.modeling
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.modeling.poolers module
----------------------------------------
-
-.. automodule:: detectron2.modeling.poolers
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.modeling.sampling module
-------------------------------------
-
-.. automodule:: detectron2.modeling.sampling
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.modeling.box_regression module
-------------------------------------------
-
-.. automodule:: detectron2.modeling.box_regression
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Model Registries
------------------
-
-These are different registries provided in modeling.
-Each registry provide you the ability to replace it with your customized component,
-without having to modify detectron2's code.
-
-Note that it is impossible to allow users to customize any line of code directly.
-Even just to add one line at some place,
-you'll likely need to find out the smallest registry which contains that line,
-and register your component to that registry.
-
-
-.. autodata:: detectron2.modeling.META_ARCH_REGISTRY
-.. autodata:: detectron2.modeling.BACKBONE_REGISTRY
-.. autodata:: detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY
-.. autodata:: detectron2.modeling.RPN_HEAD_REGISTRY
-.. autodata:: detectron2.modeling.ANCHOR_GENERATOR_REGISTRY
-.. autodata:: detectron2.modeling.ROI_HEADS_REGISTRY
-.. autodata:: detectron2.modeling.ROI_BOX_HEAD_REGISTRY
-.. autodata:: detectron2.modeling.ROI_MASK_HEAD_REGISTRY
-.. autodata:: detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst
deleted file mode 100644
index 7f4a49f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.solver package
-=========================
-
-.. automodule:: detectron2.solver
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst
deleted file mode 100644
index 5701c61..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-detectron2.structures package
-=============================
-
-.. automodule:: detectron2.structures
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst
deleted file mode 100644
index 8b57292..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst
+++ /dev/null
@@ -1,80 +0,0 @@
-detectron2.utils package
-========================
-
-detectron2.utils.colormap module
---------------------------------
-
-.. automodule:: detectron2.utils.colormap
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.utils.comm module
-----------------------------
-
-.. automodule:: detectron2.utils.comm
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.utils.events module
-------------------------------
-
-.. automodule:: detectron2.utils.events
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.utils.logger module
-------------------------------
-
-.. automodule:: detectron2.utils.logger
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.utils.registry module
---------------------------------
-
-.. automodule:: detectron2.utils.registry
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.utils.memory module
-----------------------------------
-
-.. automodule:: detectron2.utils.memory
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.utils.analysis module
-----------------------------------
-
-.. automodule:: detectron2.utils.analysis
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-detectron2.utils.visualizer module
-----------------------------------
-
-.. automodule:: detectron2.utils.visualizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-detectron2.utils.video\_visualizer module
------------------------------------------
-
-.. automodule:: detectron2.utils.video_visualizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md
deleted file mode 100644
index 963f921..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md
+++ /dev/null
@@ -1,196 +0,0 @@
-
-# Benchmarks
-
-Here we benchmark the training speed of a Mask R-CNN in detectron2,
-with some other popular open source Mask R-CNN implementations.
-
-
-### Settings
-
-* Hardware: 8 NVIDIA V100s with NVLink.
-* Software: Python 3.7, CUDA 10.1, cuDNN 7.6.5, PyTorch 1.5,
-  TensorFlow 1.15.0rc2, Keras 2.2.5, MxNet 1.6.0b20190820.
-* Model: an end-to-end R-50-FPN Mask-RCNN model, using the same hyperparameter as the
-  [Detectron baseline config](https://github.com/facebookresearch/Detectron/blob/master/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml)
-	(it does no have scale augmentation).
-* Metrics: We use the average throughput in iterations 100-500 to skip GPU warmup time.
-  Note that for R-CNN-style models, the throughput of a model typically changes during training, because
-  it depends on the predictions of the model. Therefore this metric is not directly comparable with
-  "train speed" in model zoo, which is the average speed of the entire training run.
-
-
-### Main Results
-
-```eval_rst
-+-------------------------------+--------------------+
-| Implementation                | Throughput (img/s) |
-+===============================+====================+
-| |D2| |PT|                     | 62                 |
-+-------------------------------+--------------------+
-| mmdetection_  |PT|            | 53                 |
-+-------------------------------+--------------------+
-| maskrcnn-benchmark_  |PT|     | 53                 |
-+-------------------------------+--------------------+
-| tensorpack_ |TF|              | 50                 |
-+-------------------------------+--------------------+
-| simpledet_ |mxnet|            | 39                 |
-+-------------------------------+--------------------+
-| Detectron_  |C2|              | 19                 |
-+-------------------------------+--------------------+
-| `matterport/Mask_RCNN`__ |TF| | 14                 |
-+-------------------------------+--------------------+
-
-.. _maskrcnn-benchmark: https://github.com/facebookresearch/maskrcnn-benchmark/
-.. _tensorpack: https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN
-.. _mmdetection: https://github.com/open-mmlab/mmdetection/
-.. _simpledet: https://github.com/TuSimple/simpledet/
-.. _Detectron: https://github.com/facebookresearch/Detectron
-__ https://github.com/matterport/Mask_RCNN/
-
-.. |D2| image:: https://github.com/facebookresearch/detectron2/raw/master/.github/Detectron2-Logo-Horz.svg?sanitize=true
-   :height: 15pt
-   :target: https://github.com/facebookresearch/detectron2/
-.. |PT| image:: https://pytorch.org/assets/images/logo-icon.svg
-   :width: 15pt
-   :height: 15pt
-   :target: https://pytorch.org
-.. |TF| image:: https://static.nvidiagrid.net/ngc/containers/tensorflow.png
-   :width: 15pt
-   :height: 15pt
-   :target: https://tensorflow.org
-.. |mxnet| image:: https://github.com/dmlc/web-data/raw/master/mxnet/image/mxnet_favicon.png
-   :width: 15pt
-   :height: 15pt
-   :target: https://mxnet.apache.org/
-.. |C2| image:: https://caffe2.ai/static/logo.svg
-   :width: 15pt
-   :height: 15pt
-   :target: https://caffe2.ai
-```
-
-
-Details for each implementation:
-
-* __Detectron2__: with release v0.1.2, run:
-  ```
-  python tools/train_net.py  --config-file configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml --num-gpus 8
-  ```
-
-* __mmdetection__: at commit `b0d845f`, run
-  ```
-  ./tools/dist_train.sh configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py 8
-  ```
-
-* __maskrcnn-benchmark__: use commit `0ce8f6f` with `sed -i ‘s/torch.uint8/torch.bool/g’ **/*.py; sed -i 's/AT_CHECK/TORCH_CHECK/g' **/*.cu`
-	to make it compatible with PyTorch 1.5. Then, run training with
-  ```
-  python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --config-file configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
-  ```
-  The speed we observed is faster than its model zoo, likely due to different software versions.
-
-* __tensorpack__: at commit `caafda`, `export TF_CUDNN_USE_AUTOTUNE=0`, then run
-  ```
-  mpirun -np 8 ./train.py --config DATA.BASEDIR=/data/coco TRAINER=horovod BACKBONE.STRIDE_1X1=True TRAIN.STEPS_PER_EPOCH=50 --load ImageNet-R50-AlignPadding.npz
-  ```
-
-* __SimpleDet__: at commit `9187a1`, run
-  ```
-  python detection_train.py --config config/mask_r50v1_fpn_1x.py
-  ```
-
-* __Detectron__: run
-  ```
-  python tools/train_net.py --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml
-  ```
-  Note that many of its ops run on CPUs, therefore the performance is limited.
-
-* __matterport/Mask_RCNN__: at commit `3deaec`, apply the following diff, `export TF_CUDNN_USE_AUTOTUNE=0`, then run
-  ```
-  python coco.py train --dataset=/data/coco/ --model=imagenet
-  ```
-  Note that many small details in this implementation might be different
-  from Detectron's standards.
-
-  <details>
-  <summary>
-  (diff to make it use the same hyperparameters - click to expand)
-  </summary>
-
-  ```diff
-  diff --git i/mrcnn/model.py w/mrcnn/model.py
-  index 62cb2b0..61d7779 100644
-  --- i/mrcnn/model.py
-  +++ w/mrcnn/model.py
-  @@ -2367,8 +2367,8 @@ class MaskRCNN():
-        epochs=epochs,
-        steps_per_epoch=self.config.STEPS_PER_EPOCH,
-        callbacks=callbacks,
-  -            validation_data=val_generator,
-  -            validation_steps=self.config.VALIDATION_STEPS,
-  +            #validation_data=val_generator,
-  +            #validation_steps=self.config.VALIDATION_STEPS,
-        max_queue_size=100,
-        workers=workers,
-        use_multiprocessing=True,
-  diff --git i/mrcnn/parallel_model.py w/mrcnn/parallel_model.py
-  index d2bf53b..060172a 100644
-  --- i/mrcnn/parallel_model.py
-  +++ w/mrcnn/parallel_model.py
-  @@ -32,6 +32,7 @@ class ParallelModel(KM.Model):
-      keras_model: The Keras model to parallelize
-      gpu_count: Number of GPUs. Must be > 1
-      """
-  +        super().__init__()
-      self.inner_model = keras_model
-      self.gpu_count = gpu_count
-      merged_outputs = self.make_parallel()
-  diff --git i/samples/coco/coco.py w/samples/coco/coco.py
-  index 5d172b5..239ed75 100644
-  --- i/samples/coco/coco.py
-  +++ w/samples/coco/coco.py
-  @@ -81,7 +81,10 @@ class CocoConfig(Config):
-    IMAGES_PER_GPU = 2
-
-    # Uncomment to train on 8 GPUs (default is 1)
-  -    # GPU_COUNT = 8
-  +    GPU_COUNT = 8
-  +    BACKBONE = "resnet50"
-  +    STEPS_PER_EPOCH = 50
-  +    TRAIN_ROIS_PER_IMAGE = 512
-
-    # Number of classes (including background)
-    NUM_CLASSES = 1 + 80  # COCO has 80 classes
-  @@ -496,29 +499,10 @@ if __name__ == '__main__':
-      # *** This training schedule is an example. Update to your needs ***
-
-      # Training - Stage 1
-  -        print("Training network heads")
-      model.train(dataset_train, dataset_val,
-            learning_rate=config.LEARNING_RATE,
-            epochs=40,
-  -                    layers='heads',
-  -                    augmentation=augmentation)
-  -
-  -        # Training - Stage 2
-  -        # Finetune layers from ResNet stage 4 and up
-  -        print("Fine tune Resnet stage 4 and up")
-  -        model.train(dataset_train, dataset_val,
-  -                    learning_rate=config.LEARNING_RATE,
-  -                    epochs=120,
-  -                    layers='4+',
-  -                    augmentation=augmentation)
-  -
-  -        # Training - Stage 3
-  -        # Fine tune all layers
-  -        print("Fine tune all layers")
-  -        model.train(dataset_train, dataset_val,
-  -                    learning_rate=config.LEARNING_RATE / 10,
-  -                    epochs=160,
-  -                    layers='all',
-  +                    layers='3+',
-            augmentation=augmentation)
-
-    elif args.command == "evaluate":
-  ```
-
-  </details>
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md
deleted file mode 100644
index c0d4f59..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Change Log
-
-### Releases
-See release log at
-[https://github.com/facebookresearch/detectron2/releases](https://github.com/facebookresearch/detectron2/releases).
-
-### Notable Backward Incompatible Changes:
-
-* 03/30/2020: Custom box head's `output_size` changed to `output_shape`.
-* 02/14/2020,02/18/2020: Mask head and keypoint head now include logic for losses & inference. Custom heads
-	should overwrite the feature computation by `layers()` method.
-* 11/11/2019: `detectron2.data.detection_utils.read_image` transposes images with exif information.
-
-### Config Version Change Log
-
-* v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`.
-* v2: A batch of rename of many configurations before release.
-
-### Silent Regression in Historical Versions:
-
-We list a few silent regressions since they may silently produce incorrect results and will be hard to debug.
-
-* 04/01/2020 - 05/11/2020: Bad accuracy if `TRAIN_ON_PRED_BOXES` is set to True.
-* 03/30/2020 - 04/01/2020: ResNets are not correctly built.
-* 12/19/2019 - 12/26/2019: Using aspect ratio grouping causes a drop in accuracy.
-* release - 11/9/2019: Test time augmentation does not predict the last category.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md
deleted file mode 100644
index f7b66c2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# Compatibility with Other Libraries
-
-## Compatibility with Detectron (and maskrcnn-benchmark)
-
-Detectron2 addresses some legacy issues left in Detectron. As a result, their models
-are not compatible:
-running inference with the same model weights will produce different results in the two code bases.
-
-The major differences regarding inference are:
-
-- The height and width of a box with corners (x1, y1) and (x2, y2) is now computed more naturally as
-  width = x2 - x1 and height = y2 - y1;
-  In Detectron, a "+ 1" was added both height and width.
-
-  Note that the relevant ops in Caffe2 have [adopted this change of convention](https://github.com/pytorch/pytorch/pull/20550)
-  with an extra option.
-  So it is still possible to run inference with a Detectron2-trained model in Caffe2.
-
-  The change in height/width calculations most notably changes:
-  - encoding/decoding in bounding box regression.
-  - non-maximum suppression. The effect here is very negligible, though.
-
-- RPN now uses simpler anchors with fewer quantization artifacts.
-
-  In Detectron, the anchors were quantized and
-  [do not have accurate areas](https://github.com/facebookresearch/Detectron/issues/227).
-  In Detectron2, the anchors are center-aligned to feature grid points and not quantized.
-
-- Classification layers have a different ordering of class labels.
-
-  This involves any trainable parameter with shape (..., num_categories + 1, ...).
-  In Detectron2, integer labels [0, K-1] correspond to the K = num_categories object categories
-  and the label "K" corresponds to the special "background" category.
-  In Detectron, label "0" means background, and labels [1, K] correspond to the K categories.
-
-- ROIAlign is implemented differently. The new implementation is [available in Caffe2](https://github.com/pytorch/pytorch/pull/23706).
-
-  1. All the ROIs are shifted by half a pixel compared to Detectron in order to create better image-feature-map alignment.
-     See `layers/roi_align.py` for details.
-     To enable the old behavior, use `ROIAlign(aligned=False)`, or `POOLER_TYPE=ROIAlign` instead of
-     `ROIAlignV2` (the default).
-
-  1. The ROIs are not required to have a minimum size of 1.
-     This will lead to tiny differences in the output, but should be negligible.
-
-- Mask inference function is different.
-
-  In Detectron2, the "paste_mask" function is different and should be more accurate than in Detectron. This change
-  can improve mask AP on COCO by ~0.5% absolute.
-
-There are some other differences in training as well, but they won't affect
-model-level compatibility. The major ones are:
-
-- We fixed a [bug](https://github.com/facebookresearch/Detectron/issues/459) in
-  Detectron, by making `RPN.POST_NMS_TOPK_TRAIN` per-image, rather than per-batch.
-  The fix may lead to a small accuracy drop for a few models (e.g. keypoint
-  detection) and will require some parameter tuning to match the Detectron results.
-- For simplicity, we change the default loss in bounding box regression to L1 loss, instead of smooth L1 loss.
-  We have observed that this tends to slightly decrease box AP50 while improving box AP for higher
-  overlap thresholds (and leading to a slight overall improvement in box AP).
-- We interpret the coordinates in COCO bounding box and segmentation annotations
-  as coordinates in range `[0, width]` or `[0, height]`. The coordinates in
-  COCO keypoint annotations are interpreted as pixel indices in range `[0, width - 1]` or `[0, height - 1]`.
-  Note that this affects how flip augmentation is implemented.
-
-
-We will later share more details and rationale behind the above mentioned issues
-about pixels, coordinates, and "+1"s.
-
-
-## Compatibility with Caffe2
-
-As mentioned above, despite the incompatibilities with Detectron, the relevant
-ops have been implemented in Caffe2.
-Therefore, models trained with detectron2 can be converted in Caffe2.
-See [Deployment](../tutorials/deployment.md) for the tutorial.
-
-## Compatibility with TensorFlow
-
-Most ops are available in TensorFlow, although some tiny differences in
-the implementation of resize / ROIAlign / padding need to be addressed.
-A working conversion script is provided by [tensorpack FasterRCNN](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN/convert_d2)
-to run a standard detectron2 model in TensorFlow.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md
deleted file mode 100644
index 81936df..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Contributing to detectron2
-
-## Issues
-We use GitHub issues to track public bugs and questions.
-Please make sure to follow one of the
-[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
-when reporting any issues.
-
-Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
-disclosure of security bugs. In those cases, please go through the process
-outlined on that page and do not file a public issue.
-
-## Pull Requests
-We actively welcome your pull requests.
-
-However, if you're adding any significant features (e.g. > 50 lines), please
-make sure to have a corresponding issue to discuss your motivation and proposals,
-before sending a PR. We do not always accept new features, and we take the following
-factors into consideration:
-
-1. Whether the same feature can be achieved without modifying detectron2.
-Detectron2 is designed so that you can implement many extensions from the outside, e.g.
-those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
-If some part is not as extensible, you can also bring up the issue to make it more extensible.
-2. Whether the feature is potentially useful to a large audience, or only to a small portion of users.
-3. Whether the proposed solution has a good design / interface.
-4. Whether the proposed solution adds extra mental/practical overhead to users who don't
-   need such feature.
-5. Whether the proposed solution breaks existing APIs.
-
-When sending a PR, please do:
-
-1. If a PR contains multiple orthogonal changes, split it to several PRs.
-2. If you've added code that should be tested, add tests.
-3. For PRs that need experiments (e.g. adding a new model or new methods),
-	 you don't need to update model zoo, but do provide experiment results in the description of the PR.
-4. If APIs are changed, update the documentation.
-5. Make sure your code lints with `./dev/linter.sh`.
-
-
-## Contributor License Agreement ("CLA")
-In order to accept your pull request, we need you to submit a CLA. You only need
-to do this once to work on any of Facebook's open source projects.
-
-Complete your CLA here: <https://code.facebook.com/cla>
-
-## License
-By contributing to detectron2, you agree that your contributions will be licensed
-under the LICENSE file in the root directory of this source tree.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst
deleted file mode 100644
index 63cf907..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Notes
-======================================
-
-.. toctree::
-   :maxdepth: 2
-
-   benchmarks
-   compatibility
-   contributing
-   changelog
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md
deleted file mode 100644
index 1ca9c94..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Read the docs:
-
-The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/).
-Documents in this directory are not meant to be read on github.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md
deleted file mode 100644
index 1a2633f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md
+++ /dev/null
@@ -1,99 +0,0 @@
-# Setup Builtin Datasets
-
-Detectron2 has builtin support for a few datasets.
-The datasets are assumed to exist in a directory specified by the environment variable
-`DETECTRON2_DATASETS`.
-Under this directory, detectron2 expects to find datasets in the structure described below.
-
-You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
-If left unset, the default is `./datasets` relative to your current working directory.
-
-The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md)
-contains configs and models that use these builtin datasets.
-
-## Expected dataset structure for COCO instance/keypoint detection:
-
-```
-coco/
-  annotations/
-    instances_{train,val}2017.json
-    person_keypoints_{train,val}2017.json
-  {train,val}2017/
-    # image files that are mentioned in the corresponding json
-```
-
-You can use the 2014 version of the dataset as well.
-
-Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
-which you can download with `./prepare_for_tests.sh`.
-
-## Expected dataset structure for PanopticFPN:
-
-```
-coco/
-  annotations/
-    panoptic_{train,val}2017.json
-  panoptic_{train,val}2017/  # png annotations
-  panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
-```
-
-Install panopticapi by:
-```
-pip install git+https://github.com/cocodataset/panopticapi.git
-```
-Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
-
-## Expected dataset structure for LVIS instance segmentation:
-```
-coco/
-  {train,val,test}2017/
-lvis/
-  lvis_v0.5_{train,val}.json
-  lvis_v0.5_image_info_test.json
-```
-
-Install lvis-api by:
-```
-pip install git+https://github.com/lvis-dataset/lvis-api.git
-```
-
-Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations for evaluation of models trained on the COCO dataset.
-
-## Expected dataset structure for cityscapes:
-```
-cityscapes/
-  gtFine/
-    train/
-      aachen/
-        color.png, instanceIds.png, labelIds.png, polygons.json,
-        labelTrainIds.png
-      ...
-    val/
-    test/
-  leftImg8bit/
-    train/
-    val/
-    test/
-```
-Install cityscapes scripts by:
-```
-pip install git+https://github.com/mcordts/cityscapesScripts.git
-```
-
-Note: labelTrainIds.png are created using cityscapesescript with:
-```
-CITYSCAPES_DATASET=$DETECTRON2_DATASETS/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
-```
-They are not needed for instance segmentation.
-
-## Expected dataset structure for Pascal VOC:
-```
-VOC20{07,12}/
-  Annotations/
-  ImageSets/
-    Main/
-      trainval.txt
-      test.txt
-      # train.txt or val.txt, if you use these splits
-  JPEGImages/
-```
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md
deleted file mode 100644
index ea82583..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Configs
-
-Detectron2 provides a key-value based config system that can be
-used to obtain standard, common behaviors.
-
-Detectron2's config system uses YAML and [yacs](https://github.com/rbgirshick/yacs).
-In addition to the [basic operations](../modules/config.html#detectron2.config.CfgNode)
-that access and update a config, we provide the following extra functionalities:
-
-1. The config can have `_BASE_: base.yaml` field, which will load a base config first.
-   Values in the base config will be overwritten in sub-configs, if there are any conflicts.
-   We provided several base configs for standard model architectures.
-2. We provide config versioning, for backward compatibility.
-   If your config file is versioned with a config line like `VERSION: 2`,
-   detectron2 will still recognize it even if we change some keys in the future.
-
-"Config" is a very limited abstraction.
-We do not expect all features in detectron2 to be available through configs.
-If you need something that's not available in the config space,
-please write code using detectron2's API.
-
-### Basic Usage
-
-Some basic usage of the `CfgNode` object is shown here. See more in [documentation](../modules/config.html#detectron2.config.CfgNode).
-```python
-from detectron2.config import get_cfg
-cfg = get_cfg()    # obtain detectron2's default config
-cfg.xxx = yyy      # add new configs for your own custom components
-cfg.merge_from_file("my_cfg.yaml")   # load values from a file
-
-cfg.merge_from_list(["MODEL.WEIGHTS", "weights.pth"])   # can also load values from a list of str
-print(cfg.dump())  # print formatted configs
-```
-
-Many builtin tools in detectron2 accepts command line config overwrite:
-Key-value pairs provided in the command line will overwrite the existing values in the config file.
-For example, [demo.py](../../demo/demo.py) can be used with
-```
-./demo.py --config-file config.yaml [--other-options] \
-  --opts MODEL.WEIGHTS /path/to/weights INPUT.MIN_SIZE_TEST 1000
-```
-
-To see a list of available configs in detectron2 and what they mean,
-check [Config References](../modules/config.html#config-references)
-
-
-### Best Practice with Configs
-
-1. Treat the configs you write as "code": avoid copying them or duplicating them; use `_BASE_`
-   to share common parts between configs.
-
-2. Keep the configs you write simple: don't include keys that do not affect the experimental setting.
-
-3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`,
-   for backward compatibility.
-	 We print a warning when reading a config without version number.
-   The official configs do not include version number because they are meant to
-   be always up-to-date.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md
deleted file mode 100644
index bb037ca..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md
+++ /dev/null
@@ -1,77 +0,0 @@
-
-# Use Custom Dataloaders
-
-## How the Existing Dataloader Works
-
-Detectron2 contains a builtin data loading pipeline.
-It's good to understand how it works, in case you need to write a custom one.
-
-Detectron2 provides two functions
-[build_detection_{train,test}_loader](../modules/data.html#detectron2.data.build_detection_train_loader)
-that create a default data loader from a given config.
-Here is how `build_detection_{train,test}_loader` work:
-
-1. It takes the name of a registered dataset (e.g., "coco_2017_train") and loads a `list[dict]` representing the dataset items
-   in a lightweight, canonical format. These dataset items are not yet ready to be used by the model (e.g., images are
-   not loaded into memory, random augmentations have not been applied, etc.).
-   Details about the dataset format and dataset registration can be found in
-   [datasets](./datasets.md).
-2. Each dict in this list is mapped by a function ("mapper"):
-   * Users can customize this mapping function by specifying the "mapper" argument in
-        `build_detection_{train,test}_loader`. The default mapper is [DatasetMapper](../modules/data.html#detectron2.data.DatasetMapper).
-   * The output format of such function can be arbitrary, as long as it is accepted by the consumer of this data loader (usually the model).
-     The outputs of the default mapper, after batching, follow the default model input format documented in
-     [Use Models](./models.html#model-input-format).
-   * The role of the mapper is to transform the lightweight, canonical representation of a dataset item into a format
-     that is ready for the model to consume (including, e.g., read images, perform random data augmentation and convert to torch Tensors).
-     If you would like to perform custom transformations to data, you often want a custom mapper.
-3. The outputs of the mapper are batched (simply into a list).
-4. This batched data is the output of the data loader. Typically, it's also the input of
-   `model.forward()`.
-
-
-## Write a Custom Dataloader
-
-Using a different "mapper" with `build_detection_{train,test}_loader(mapper=)` works for most use cases
-of custom data loading.
-For example, if you want to resize all images to a fixed size for Mask R-CNN training, write this:
-
-```python
-from detectron2.data import build_detection_train_loader
-from detectron2.data import transforms as T
-from detectron2.data import detection_utils as utils
-
-def mapper(dataset_dict):
-	# Implement a mapper, similar to the default DatasetMapper, but with your own customizations
-	dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
-	image = utils.read_image(dataset_dict["file_name"], format="BGR")
-	image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image)
-	dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
-
-	annos = [
-		utils.transform_instance_annotations(obj, transforms, image.shape[:2])
-		for obj in dataset_dict.pop("annotations")
-		if obj.get("iscrowd", 0) == 0
-	]
-	instances = utils.annotations_to_instances(annos, image.shape[:2])
-	dataset_dict["instances"] = utils.filter_empty_instances(instances)
-	return dataset_dict
-
-data_loader = build_detection_train_loader(cfg, mapper=mapper)
-# use this dataloader instead of the default
-```
-Refer to [API documentation of detectron2.data](../modules/data) for details.
-
-If you want to change not only the mapper (e.g., to write different sampling or batching logic),
-you can write your own data loader. The data loader is simply a
-python iterator that produces [the format](./models.md) your model accepts.
-You can implement it using any tools you like.
-
-## Use a Custom Dataloader
-
-If you use [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer),
-you can overwrite its `build_{train,test}_loader` method to use your own dataloader.
-See the [densepose dataloader](../../projects/DensePose/train_net.py)
-for an example.
-
-If you write your own training loop, you can plug in your data loader easily.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md
deleted file mode 100644
index 8dc1c0c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md
+++ /dev/null
@@ -1,221 +0,0 @@
-# Use Custom Datasets
-
-Datasets that have builtin support in detectron2 are listed in [datasets](../../datasets).
-If you want to use a custom dataset while also reusing detectron2's data loaders,
-you will need to
-
-1. __Register__ your dataset (i.e., tell detectron2 how to obtain your dataset).
-2. Optionally, __register metadata__ for your dataset.
-
-Next, we explain the above two concepts in detail.
-
-The [Colab tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-has a live example of how to register and train on a dataset of custom formats.
-
-### Register a Dataset
-
-To let detectron2 know how to obtain a dataset named "my_dataset", you will implement
-a function that returns the items in your dataset and then tell detectron2 about this
-function:
-```python
-def my_dataset_function():
-  ...
-  return list[dict] in the following format
-
-from detectron2.data import DatasetCatalog
-DatasetCatalog.register("my_dataset", my_dataset_function)
-```
-
-Here, the snippet associates a dataset "my_dataset" with a function that returns the data.
-The registration stays effective until the process exists.
-
-The function can processes data from its original format into either one of the following:
-1. Detectron2's standard dataset dict, described below. This will work with many other builtin
-	 features in detectron2, so it's recommended to use it when it's sufficient for your task.
-2. Your custom dataset dict. You can also return arbitrary dicts in your own format,
-	 such as adding extra keys for new tasks.
-	 Then you will need to handle them properly downstream as well.
-	 See below for more details.
-
-#### Standard Dataset Dicts
-
-For standard tasks
-(instance detection, instance/semantic/panoptic segmentation, keypoint detection),
-we load the original dataset into `list[dict]` with a specification similar to COCO's json annotations.
-This is our standard representation for a dataset.
-
-Each dict contains information about one image.
-The dict may have the following fields,
-and the required fields vary based on what the dataloader or the task needs (see more below).
-
-+ `file_name`: the full path to the image file. Will apply rotation and flipping if the image has such exif information.
-+ `height`, `width`: integer. The shape of image.
-+ `image_id` (str or int): a unique id that identifies this image. Used
-	during evaluation to identify the images, but a dataset may use it for different purposes.
-+ `annotations` (list[dict]): each dict corresponds to annotations of one instance
-  in this image. Required by instance detection/segmentation or keypoint detection tasks.
-
-	Images with empty `annotations` will by default be removed from training,
-	but can be included using `DATALOADER.FILTER_EMPTY_ANNOTATIONS`.
-
-	Each dict contains the following keys, of which `bbox`,`bbox_mode` and `category_id` are required:
-  + `bbox` (list[float]): list of 4 numbers representing the bounding box of the instance.
-  + `bbox_mode` (int): the format of bbox.
-    It must be a member of
-    [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode).
-    Currently supports: `BoxMode.XYXY_ABS`, `BoxMode.XYWH_ABS`.
-  + `category_id` (int): an integer in the range [0, num_categories) representing the category label.
-    The value num_categories is reserved to represent the "background" category, if applicable.
-  + `segmentation` (list[list[float]] or dict): the segmentation mask of the instance.
-    + If `list[list[float]]`, it represents a list of polygons, one for each connected component
-      of the object. Each `list[float]` is one simple polygon in the format of `[x1, y1, ..., xn, yn]`.
-      The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates,
-      depend on whether "bbox_mode" is relative.
-    + If `dict`, it represents the per-pixel segmentation mask in COCO's RLE format. The dict should have
-			keys "size" and "counts". You can convert a uint8 segmentation mask of 0s and 1s into
-			RLE format by `pycocotools.mask.encode(np.asarray(mask, order="F"))`.
-  + `keypoints` (list[float]): in the format of [x1, y1, v1,..., xn, yn, vn].
-    v[i] means the [visibility](http://cocodataset.org/#format-data) of this keypoint.
-    `n` must be equal to the number of keypoint categories.
-    The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates,
-    depend on whether "bbox_mode" is relative.
-
-    Note that the coordinate annotations in COCO format are integers in range [0, H-1 or W-1].
-    By default, detectron2 adds 0.5 to absolute keypoint coordinates to convert them from discrete
-    pixel indices to floating point coordinates.
-  + `iscrowd`: 0 (default) or 1. Whether this instance is labeled as COCO's "crowd
-    region". Don't include this field if you don't know what it means.
-+ `sem_seg_file_name`: the full path to the ground truth semantic segmentation file.
-	Required by semantic segmentation task.
-	It should be an image whose pixel values are integer labels.
-
-
-Fast R-CNN (with precomputed proposals) is rarely used today.
-To train a Fast R-CNN, the following extra keys are needed:
-
-+ `proposal_boxes` (array): 2D numpy array with shape (K, 4) representing K precomputed proposal boxes for this image.
-+ `proposal_objectness_logits` (array): numpy array with shape (K, ), which corresponds to the objectness
-  logits of proposals in 'proposal_boxes'.
-+ `proposal_bbox_mode` (int): the format of the precomputed proposal bbox.
-  It must be a member of
-  [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode).
-  Default is `BoxMode.XYXY_ABS`.
-
-#### Custom Dataset Dicts for New Tasks
-
-In the `list[dict]` that your dataset function returns, the dictionary can also have arbitrary custom data.
-This will be useful for a new task that needs extra information not supported
-by the standard dataset dicts. In this case, you need to make sure the downstream code can handle your data
-correctly. Usually this requires writing a new `mapper` for the dataloader (see [Use Custom Dataloaders](./data_loading.md)).
-
-When designing a custom format, note that all dicts are stored in memory
-(sometimes serialized and with multiple copies).
-To save memory, each dict is meant to contain small but sufficient information
-about each sample, such as file names and annotations.
-Loading full samples typically happens in the data loader.
-
-For attributes shared among the entire dataset, use `Metadata` (see below).
-To avoid extra memory, do not save such information repeatly for each sample.
-
-### "Metadata" for Datasets
-
-Each dataset is associated with some metadata, accessible through
-`MetadataCatalog.get(dataset_name).some_metadata`.
-Metadata is a key-value mapping that contains information that's shared among
-the entire dataset, and usually is used to interpret what's in the dataset, e.g.,
-names of classes, colors of classes, root of files, etc.
-This information will be useful for augmentation, evaluation, visualization, logging, etc.
-The structure of metadata depends on the what is needed from the corresponding downstream code.
-
-If you register a new dataset through `DatasetCatalog.register`,
-you may also want to add its corresponding metadata through
-`MetadataCatalog.get(dataset_name).some_key = some_value`, to enable any features that need the metadata.
-You can do it like this (using the metadata key "thing_classes" as an example):
-
-```python
-from detectron2.data import MetadataCatalog
-MetadataCatalog.get("my_dataset").thing_classes = ["person", "dog"]
-```
-
-Here is a list of metadata keys that are used by builtin features in detectron2.
-If you add your own dataset without these metadata, some features may be
-unavailable to you:
-
-* `thing_classes` (list[str]): Used by all instance detection/segmentation tasks.
-  A list of names for each instance/thing category.
-  If you load a COCO format dataset, it will be automatically set by the function `load_coco_json`.
-
-* `thing_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each thing category.
-  Used for visualization. If not given, random colors are used.
-
-* `stuff_classes` (list[str]): Used by semantic and panoptic segmentation tasks.
-  A list of names for each stuff category.
-
-* `stuff_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each stuff category.
-  Used for visualization. If not given, random colors are used.
-
-* `keypoint_names` (list[str]): Used by keypoint localization. A list of names for each keypoint.
-
-* `keypoint_flip_map` (list[tuple[str]]): Used by the keypoint localization task. A list of pairs of names,
-  where each pair are the two keypoints that should be flipped if the image is
-  flipped horizontally during augmentation.
-* `keypoint_connection_rules`: list[tuple(str, str, (r, g, b))]. Each tuple specifies a pair of keypoints
-  that are connected and the color to use for the line between them when visualized.
-
-Some additional metadata that are specific to the evaluation of certain datasets (e.g. COCO):
-
-* `thing_dataset_id_to_contiguous_id` (dict[int->int]): Used by all instance detection/segmentation tasks in the COCO format.
-  A mapping from instance class ids in the dataset to contiguous ids in range [0, #class).
-  Will be automatically set by the function `load_coco_json`.
-
-* `stuff_dataset_id_to_contiguous_id` (dict[int->int]): Used when generating prediction json files for
-  semantic/panoptic segmentation.
-  A mapping from semantic segmentation class ids in the dataset
-  to contiguous ids in [0, num_categories). It is useful for evaluation only.
-
-* `json_file`: The COCO annotation json file. Used by COCO evaluation for COCO-format datasets.
-* `panoptic_root`, `panoptic_json`: Used by panoptic evaluation.
-* `evaluator_type`: Used by the builtin main training script to select
-   evaluator. Don't use it in a new training script.
-   You can just provide the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator)
-   for your dataset directly in your main script.
-
-NOTE: For background on the concept of "thing" and "stuff", see
-[On Seeing Stuff: The Perception of Materials by Humans and Machines](http://persci.mit.edu/pub_pdfs/adelson_spie_01.pdf).
-In detectron2, the term "thing" is used for instance-level tasks,
-and "stuff" is used for semantic segmentation tasks.
-Both are used in panoptic segmentation.
-
-### Register a COCO Format Dataset
-
-If your dataset is already a json file in the COCO format,
-the dataset and its associated metadata can be registered easily with:
-```python
-from detectron2.data.datasets import register_coco_instances
-register_coco_instances("my_dataset", {}, "json_annotation.json", "path/to/image/dir")
-```
-
-If your dataset is in COCO format but with extra custom per-instance annotations,
-the [load_coco_json](../modules/data.html#detectron2.data.datasets.load_coco_json)
-function might be useful.
-
-### Update the Config for New Datasets
-
-Once you've registered the dataset, you can use the name of the dataset (e.g., "my_dataset" in
-example above) in `cfg.DATASETS.{TRAIN,TEST}`.
-There are other configs you might want to change to train or evaluate on new datasets:
-
-* `MODEL.ROI_HEADS.NUM_CLASSES` and `MODEL.RETINANET.NUM_CLASSES` are the number of thing classes
-	for R-CNN and RetinaNet models, respectively.
-* `MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS` sets the number of keypoints for Keypoint R-CNN.
-  You'll also need to set [Keypoint OKS](http://cocodataset.org/#keypoints-eval)
-	with `TEST.KEYPOINT_OKS_SIGMAS` for evaluation.
-* `MODEL.SEM_SEG_HEAD.NUM_CLASSES` sets the number of stuff classes for Semantic FPN & Panoptic FPN.
-* If you're training Fast R-CNN (with precomputed proposals), `DATASETS.PROPOSAL_FILES_{TRAIN,TEST}`
-	need to match the datasets. The format of proposal files are documented
-	[here](../modules/data.html#detectron2.data.load_proposals_into_dataset).
-
-New models
-(e.g. [TensorMask](../../projects/TensorMask),
-[PointRend](../../projects/PointRend))
-often have similar configs of their own that need to be changed as well.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md
deleted file mode 100644
index a473247..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# Deployment
-
-## Caffe2 Deployment
-We currently support converting a detectron2 model to Caffe2 format through ONNX.
-The converted Caffe2 model is able to run without detectron2 dependency in either Python or C++.
-It has a runtime optimized for CPU & mobile inference, but not for GPU inference.
-
-Caffe2 conversion requires PyTorch ≥ 1.4 and ONNX ≥ 1.6.
-
-### Coverage
-
-It supports 3 most common meta architectures: `GeneralizedRCNN`, `RetinaNet`, `PanopticFPN`,
-and most official models under these 3 meta architectures.
-
-Users' custom extensions under these architectures (added through registration) are supported
-as long as they do not contain control flow or operators not available in Caffe2 (e.g. deformable convolution).
-For example, custom backbones and heads are often supported out of the box.
-
-### Usage
-
-The conversion APIs are documented at [the API documentation](../modules/export).
-We provide a tool, `caffe2_converter.py` as an example that uses
-these APIs to convert a standard model.
-
-To convert an official Mask R-CNN trained on COCO, first
-[prepare the COCO dataset](../../datasets/), then pick the model from [Model Zoo](../../MODEL_ZOO.md), and run:
-```
-cd tools/deploy/ && ./caffe2_converter.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
-	--output ./caffe2_model --run-eval \
-	MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \
-	MODEL.DEVICE cpu
-```
-
-Note that:
-1. The conversion needs valid sample inputs & weights to trace the model. That's why the script requires the dataset.
-	 You can modify the script to obtain sample inputs in other ways.
-2. With the `--run-eval` flag, it will evaluate the converted models to verify its accuracy.
-   The accuracy is typically slightly different (within 0.1 AP) from PyTorch due to
-	 numerical precisions between different implementations.
-	 It's recommended to always verify the accuracy in case your custom model is not supported by the
-	 conversion.
-
-The converted model is available at the specified `caffe2_model/` directory. Two files `model.pb`
-and `model_init.pb` that contain network structure and network parameters are necessary for deployment.
-These files can then be loaded in C++ or Python using Caffe2's APIs.
-
-The script generates `model.svg` file which contains a visualization of the network.
-You can also load `model.pb` to tools such as [netron](https://github.com/lutzroeder/netron) to visualize it.
-
-### Use the model in C++/Python
-
-The model can be loaded in C++. An example [caffe2_mask_rcnn.cpp](../../tools/deploy/) is given,
-which performs CPU/GPU inference using `COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x`.
-
-The C++ example needs to be built with:
-* PyTorch with caffe2 inside
-* gflags, glog, opencv
-* protobuf headers that match the version of your caffe2
-* MKL headers if caffe2 is built with MKL
-
-The following can compile the example inside [official detectron2 docker](../../docker/):
-```
-sudo apt update && sudo apt install libgflags-dev libgoogle-glog-dev libopencv-dev
-pip install mkl-include
-wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz
-tar xf protobuf-cpp-3.6.1.tar.gz
-export CPATH=$(readlink -f ./protobuf-3.6.1/src/):$HOME/.local/include
-export CMAKE_PREFIX_PATH=$HOME/.local/lib/python3.6/site-packages/torch/
-mkdir build && cd build
-cmake -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST .. && make
-
-# To run:
-./caffe2_mask_rcnn --predict_net=./model.pb --init_net=./model_init.pb --input=input.jpg
-```
-
-Note that:
-
-* All converted models (the .pb files) take two input tensors:
-  "data" is an NCHW image, and "im_info" is an Nx3 tensor consisting of (height, width, 1.0) for
-  each image (the shape of "data" might be larger than that in "im_info" due to padding).
-
-* The converted models do not contain post-processing operations that
-  transform raw layer outputs into formatted predictions.
-  The example only produces raw outputs (28x28 masks) from the final
-  layers that are not post-processed, because in actual deployment, an application often needs
-  its custom lightweight post-processing (e.g. full-image masks for every detected object is often not necessary).
-
-We also provide a python wrapper around the converted model, in the
-[Caffe2Model.\_\_call\_\_](../modules/export.html#detectron2.export.Caffe2Model.__call__) method.
-This method has an interface that's identical to the [pytorch versions of models](./models.md),
-and it internally applies pre/post-processing code to match the formats.
-They can serve as a reference for pre/post-processing in actual deployment.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md
deleted file mode 100644
index c71adb7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md
+++ /dev/null
@@ -1,43 +0,0 @@
-
-# Evaluation
-
-Evaluation is a process that takes a number of inputs/outputs pairs and aggregate them.
-You can always [use the model](./models.md) directly and just parse its inputs/outputs manually to perform
-evaluation.
-Alternatively, evaluation is implemented in detectron2 using the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator)
-interface.
-
-Detectron2 includes a few `DatasetEvaluator` that computes metrics using standard dataset-specific
-APIs (e.g., COCO, LVIS).
-You can also implement your own `DatasetEvaluator` that performs some other jobs
-using the inputs/outputs pairs.
-For example, to count how many instances are detected on the validation set:
-
-```
-class Counter(DatasetEvaluator):
-  def reset(self):
-    self.count = 0
-  def process(self, inputs, outputs):
-    for output in outputs:
-      self.count += len(output["instances"])
-  def evaluate(self):
-    # save self.count somewhere, or print it, or return it.
-    return {"count": self.count}
-```
-
-Once you have some `DatasetEvaluator`, you can run it with
-[inference_on_dataset](../modules/evaluation.html#detectron2.evaluation.inference_on_dataset).
-For example,
-
-```python
-val_results = inference_on_dataset(
-    model,
-    val_data_loader,
-    DatasetEvaluators([COCOEvaluator(...), Counter()]))
-```
-Compared to running the evaluation manually using the model, the benefit of this function is that
-you can merge evaluators together using [DatasetEvaluators](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluators).
-In this way you can run all evaluations without having to go through the dataset multiple times.
-
-The `inference_on_dataset` function also provides accurate speed benchmarks for the
-given model and dataset.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md
deleted file mode 100644
index 4232185..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Extend Detectron2's Defaults
-
-__Research is about doing things in new ways__.
-This brings a tension in how to create abstractions in code,
-which is a challenge for any research engineering project of a significant size:
-
-1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing
-   everything in new ways. It should be reasonably easy to break existing
-   abstractions and replace them with new ones.
-
-2. On the other hand, such a project also needs reasonably high-level
-   abstractions, so that users can easily do things in standard ways,
-   without worrying too much about the details that only certain researchers care about.
-
-In detectron2, there are two types of interfaces that address this tension together:
-
-1. Functions and classes that take a config (`cfg`) argument
-   (sometimes with only a few extra arguments).
-
-   Such functions and classes implement
-   the "standard default" behavior: it will read what it needs from the
-   config and do the "standard" thing.
-   Users only need to load a given config and pass it around, without having to worry about
-   which arguments are used and what they all mean.
-
-2. Functions and classes that have well-defined explicit arguments.
-
-   Each of these is a small building block of the entire system.
-   They require users' expertise to understand what each argument should be,
-   and require more effort to stitch together to a larger system.
-   But they can be stitched together in more flexible ways.
-
-   When you need to implement something not supported by the "standard defaults"
-   included in detectron2, these well-defined components can be reused.
-
-3. (experimental) A few classes are implemented with the
-   [@configurable](../../modules/config.html#detectron2.config.configurable)
-   decorator - they can be called with either a config, or with explicit arguments.
-   Their explicit argument interfaces are currently __experimental__ and subject to change.
-
-
-If you only need the standard behavior, the [Beginner's Tutorial](./getting_started.md)
-should suffice. If you need to extend detectron2 to your own needs,
-see the following tutorials for more details:
-
-* Detectron2 includes a few standard datasets. To use custom ones, see
-  [Use Custom Datasets](./datasets.md).
-* Detectron2 contains the standard logic that creates a data loader for training/testing from a
-  dataset, but you can write your own as well. See [Use Custom Data Loaders](./data_loading.md).
-* Detectron2 implements many standard detection models, and provide ways for you
-  to overwrite their behaviors. See [Use Models](./models.md) and [Write Models](./write-models.md).
-* Detectron2 provides a default training loop that is good for common training tasks.
-  You can customize it with hooks, or write your own loop instead. See [training](./training.md).
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md
deleted file mode 100644
index acaf13f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md
+++ /dev/null
@@ -1,79 +0,0 @@
-## Getting Started with Detectron2
-
-This document provides a brief intro of the usage of builtin command-line tools in detectron2.
-
-For a tutorial that involves actual coding with the API,
-see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-which covers how to run inference with an
-existing model, and how to train a builtin model on a custom dataset.
-
-For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
-
-
-### Inference Demo with Pre-trained Models
-
-1. Pick a model and its config file from
-	[model zoo](MODEL_ZOO.md),
-	for example, `mask_rcnn_R_50_FPN_3x.yaml`.
-2. We provide `demo.py` that is able to run builtin standard models. Run it with:
-```
-cd demo/
-python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
-  --input input1.jpg input2.jpg \
-  [--other-options]
-  --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
-```
-The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
-This command will run the inference and show visualizations in an OpenCV window.
-
-For details of the command line arguments, see `demo.py -h` or look at its source code
-to understand its behavior. Some common arguments are:
-* To run __on your webcam__, replace `--input files` with `--webcam`.
-* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
-* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
-* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
-
-
-### Training & Evaluation in Command Line
-
-We provide a script in "tools/{,plain_}train_net.py", that is made to train
-all the configs provided in detectron2.
-You may want to use it as a reference to write your own training script.
-
-To train a model with "train_net.py", first
-setup the corresponding datasets following
-[datasets/README.md](./datasets/README.md),
-then run:
-```
-cd tools/
-./train_net.py --num-gpus 8 \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
-```
-
-The configs are made for 8-GPU training.
-To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
-```
-./train_net.py \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
-	--num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
-```
-
-For most models, CPU training is not supported.
-
-To evaluate a model's performance, use
-```
-./train_net.py \
-	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
-	--eval-only MODEL.WEIGHTS /path/to/checkpoint_file
-```
-For more options, see `./train_net.py -h`.
-
-### Use Detectron2 APIs in Your Code
-
-See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-to learn how to use detectron2 APIs to:
-1. run inference with an existing model
-2. train a builtin model on a custom dataset
-
-See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
-for more ways to build your project on detectron2.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst
deleted file mode 100644
index 896e71e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-Tutorials
-======================================
-
-.. toctree::
-   :maxdepth: 2
-
-   install
-   getting_started
-   builtin_datasets
-   extend
-   datasets
-   data_loading
-   models
-   write-models
-   training
-   evaluation
-   configs
-   deployment
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md
deleted file mode 100644
index 3985f8a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md
+++ /dev/null
@@ -1,184 +0,0 @@
-## Installation
-
-Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
-has step-by-step instructions that install detectron2.
-The [Dockerfile](docker)
-also installs detectron2 with a few simple commands.
-
-### Requirements
-- Linux or macOS with Python ≥ 3.6
-- PyTorch ≥ 1.4
-- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
-	You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
-- OpenCV, optional, needed by demo and visualization
-- pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
-
-
-### Build Detectron2 from Source
-
-gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
-After having them, run:
-```
-python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
-# (add --user if you don't have permission)
-
-# Or, to install it from a local clone:
-git clone https://github.com/facebookresearch/detectron2.git
-python -m pip install -e detectron2
-
-# Or if you are on macOS
-# CC=clang CXX=clang++ python -m pip install -e .
-```
-
-To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
-old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
-
-### Install Pre-Built Detectron2 (Linux only)
-```
-# for CUDA 10.1:
-python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html
-```
-You can replace cu101 with "cu{100,92}" or "cpu".
-
-Note that:
-1. Such installation has to be used with certain version of official PyTorch release.
-   See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements.
-   It will not work with a different version of PyTorch or a non-official build of PyTorch.
-2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be
-	 compatible with the master branch of a research project that uses detectron2 (e.g. those in
-	 [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)).
-
-### Common Installation Issues
-
-If you met issues using the pre-built detectron2, please uninstall it and try building it from source.
-
-Click each issue for its solutions:
-
-<details>
-<summary>
-Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library.
-</summary>
-<br/>
-
-This usually happens when detectron2 or torchvision is not
-compiled with the version of PyTorch you're running.
-
-Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch.
-If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
-following [pytorch.org](http://pytorch.org). So the versions will match.
-
-If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
-to see the corresponding pytorch version required for each pre-built detectron2.
-
-If the error comes from detectron2 or torchvision that you built manually from source,
-remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
-
-If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
-in your issue.
-</details>
-
-<details>
-<summary>
-Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
-</summary>
-<br/>
-Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
-
-This often happens with old anaconda.
-Try `conda update libgcc`. Then rebuild detectron2.
-
-The fundamental solution is to run the code with proper C++ runtime.
-One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
-
-</details>
-
-<details>
-<summary>
-"Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
-</summary>
-<br/>
-CUDA is not found when building detectron2.
-You should make sure
-
-```
-python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
-```
-
-print valid outputs at the time you build detectron2.
-
-Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
-</details>
-
-<details>
-<summary>
-"invalid device function" or "no kernel image is available for execution".
-</summary>
-<br/>
-Two possibilities:
-
-* You build detectron2 with one version of CUDA but run it with a different version.
-
-  To check whether it is the case,
-  use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
-	In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
-	to contain cuda libraries of the same version.
-
-	When they are inconsistent,
-	you need to either install a different build of PyTorch (or build by yourself)
-	to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
-
-* Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility).
-
-	The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in
-	`python -m detectron2.utils.collect_env`.
-
-	The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected
-	during compilation. This means the compiled code may not work on a different GPU model.
-	To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation.
-
-	For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s.
-	Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out
-	the correct compute compatibility number for your device.
-
-</details>
-
-<details>
-<summary>
-Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures.
-</summary>
-<br/>
-The version of NVCC you use to build detectron2 or torchvision does
-not match the version of CUDA you are running with.
-This often happens when using anaconda's CUDA runtime.
-
-Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
-In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
-to contain cuda libraries of the same version.
-
-When they are inconsistent,
-you need to either install a different build of PyTorch (or build by yourself)
-to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
-</details>
-
-
-<details>
-<summary>
-"ImportError: cannot import name '_C'".
-</summary>
-<br/>
-Please build and install detectron2 following the instructions above.
-
-If you are running code from detectron2's root directory, `cd` to a different one.
-Otherwise you may not import the code that you installed.
-</details>
-
-<details>
-<summary>
-ONNX conversion segfault after some "TraceWarning".
-</summary>
-<br/>
-The ONNX package is compiled with too old compiler.
-
-Please build and install ONNX from its source code using a compiler
-whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
-</details>
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md
deleted file mode 100644
index 456f36d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md
+++ /dev/null
@@ -1,151 +0,0 @@
-# Use Models
-
-Models (and their sub-models) in detectron2 are built by
-functions such as `build_model`, `build_backbone`, `build_roi_heads`:
-```python
-from detectron2.modeling import build_model
-model = build_model(cfg)  # returns a torch.nn.Module
-```
-
-`build_model` only builds the model structure, and fill it with random parameters.
-See below for how to load an existing checkpoint to the model,
-and how to use the `model` object.
-
-### Load/Save a Checkpoint
-```python
-from detectron2.checkpoint import DetectionCheckpointer
-DetectionCheckpointer(model).load(file_path)   # load a file to model
-
-checkpointer = DetectionCheckpointer(model, save_dir="output")
-checkpointer.save("model_999")  # save to output/model_999.pth
-```
-
-Detectron2's checkpointer recognizes models in pytorch's `.pth` format, as well as the `.pkl` files
-in our model zoo.
-See [API doc](../modules/checkpoint.html#detectron2.checkpoint.DetectionCheckpointer)
-for more details about its usage.
-
-The model files can be arbitrarily manipulated using `torch.{load,save}` for `.pth` files or
-`pickle.{dump,load}` for `.pkl` files.
-
-### Use a Model
-
-A model can be called by `outputs = model(inputs)`, where `inputs` is a `list[dict]`.
-Each dict corresponds to one image and the required keys
-depend on the type of model, and whether the model is in training or evaluation mode.
-For example, in order to do inference,
-all existing models expect the "image" key, and optionally "height" and "width".
-The detailed format of inputs and outputs of existing models are explained below.
-
-When in training mode, all models are required to be used under an `EventStorage`.
-The training statistics will be put into the storage:
-```python
-from detectron2.utils.events import EventStorage
-with EventStorage() as storage:
-  losses = model(inputs)
-```
-
-If you only want to do simple inference using an existing model,
-[DefaultPredictor](../modules/engine.html#detectron2.engine.defaults.DefaultPredictor)
-is a wrapper around model that provides such basic functionality.
-It includes default behavior including model loading, preprocessing,
-and operates on single image rather than batches.
-
-### Model Input Format
-
-Users can implement custom models that support any arbitrary input format.
-Here we describe the standard input format that all builtin models support in detectron2.
-They all take a `list[dict]` as the inputs. Each dict
-corresponds to information about one image.
-
-The dict may contain the following keys:
-
-* "image": `Tensor` in (C, H, W) format. The meaning of channels are defined by `cfg.INPUT.FORMAT`.
-  Image normalization, if any, will be performed inside the model using
-	`cfg.MODEL.PIXEL_{MEAN,STD}`.
-* "instances": an [Instances](../modules/structures.html#detectron2.structures.Instances)
-  object, with the following fields:
-  + "gt_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each instance.
-  + "gt_classes": `Tensor` of long type, a vector of N labels, in range [0, num_categories).
-  + "gt_masks": a [PolygonMasks](../modules/structures.html#detectron2.structures.PolygonMasks)
-    or [BitMasks](../modules/structures.html#detectron2.structures.BitMasks) object storing N masks, one for each instance.
-  + "gt_keypoints": a [Keypoints](../modules/structures.html#detectron2.structures.Keypoints)
-    object storing N keypoint sets, one for each instance.
-* "proposals": an [Instances](../modules/structures.html#detectron2.structures.Instances)
-  object used only in Fast R-CNN style models, with the following fields:
-  + "proposal_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing P proposal boxes.
-  + "objectness_logits": `Tensor`, a vector of P scores, one for each proposal.
-* "height", "width": the **desired** output height and width, which is not necessarily the same
-  as the height or width of the `image` input field.
-  For example, the `image` input field might be a resized image,
-  but you may want the outputs to be in **original** resolution.
-
-  If provided, the model will produce output in this resolution,
-  rather than in the resolution of the `image` as input into the model. This is more efficient and accurate.
-* "sem_seg": `Tensor[int]` in (H, W) format. The semantic segmentation ground truth.
-  Values represent category labels starting from 0.
-
-
-#### How it connects to data loader:
-
-The output of the default [DatasetMapper]( ../modules/data.html#detectron2.data.DatasetMapper) is a dict
-that follows the above format.
-After the data loader performs batching, it becomes `list[dict]` which the builtin models support.
-
-
-### Model Output Format
-
-When in training mode, the builtin models output a `dict[str->ScalarTensor]` with all the losses.
-
-When in inference mode, the builtin models output a `list[dict]`, one dict for each image.
-Based on the tasks the model is doing, each dict may contain the following fields:
-
-* "instances": [Instances](../modules/structures.html#detectron2.structures.Instances)
-  object with the following fields:
-  * "pred_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each detected instance.
-  * "scores": `Tensor`, a vector of N scores.
-  * "pred_classes": `Tensor`, a vector of N labels in range [0, num_categories).
-  + "pred_masks": a `Tensor` of shape (N, H, W), masks for each detected instance.
-  + "pred_keypoints": a `Tensor` of shape (N, num_keypoint, 3).
-    Each row in the last dimension is (x, y, score). Scores are larger than 0.
-* "sem_seg": `Tensor` of (num_categories, H, W), the semantic segmentation prediction.
-* "proposals": [Instances](../modules/structures.html#detectron2.structures.Instances)
-  object with the following fields:
-  * "proposal_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes)
-    object storing N boxes.
-  * "objectness_logits": a torch vector of N scores.
-* "panoptic_seg": A tuple of `(Tensor, list[dict])`. The tensor has shape (H, W), where each element
-  represent the segment id of the pixel. Each dict describes one segment id and has the following fields:
-  * "id": the segment id
-  * "isthing": whether the segment is a thing or stuff
-  * "category_id": the category id of this segment. It represents the thing
-       class id when `isthing==True`, and the stuff class id otherwise.
-
-
-### Partially execute a model:
-
-Sometimes you may want to obtain an intermediate tensor inside a model.
-Since there are typically hundreds of intermediate tensors, there isn't an API that provides you
-the intermediate result you need.
-You have the following options:
-
-1. Write a (sub)model. Following the [tutorial](./write-models.md), you can
-   rewrite a model component (e.g. a head of a model), such that it
-   does the same thing as the existing component, but returns the output
-   you need.
-2. Partially execute a model. You can create the model as usual,
-   but use custom code to execute it instead of its `forward()`. For example,
-   the following code obtains mask features before mask head.
-
-```python
-images = ImageList.from_tensors(...)  # preprocessed input tensor
-model = build_model(cfg)
-features = model.backbone(images.tensor)
-proposals, _ = model.proposal_generator(images, features)
-instances = model.roi_heads._forward_box(features, proposals)
-mask_features = [features[f] for f in model.roi_heads.in_features]
-mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances])
-```
-
-Note that both options require you to read the existing forward code to understand
-how to write code to obtain the outputs you need.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md
deleted file mode 100644
index dc7d537..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Training
-
-From the previous tutorials, you may now have a custom model and data loader.
-
-You are free to create your own optimizer, and write the training logic: it's
-usually easy with PyTorch, and allow researchers to see the entire training
-logic more clearly and have full control.
-One such example is provided in [tools/plain_train_net.py](../../tools/plain_train_net.py).
-
-We also provide a standarized "trainer" abstraction with a
-[minimal hook system](../modules/engine.html#detectron2.engine.HookBase)
-that helps simplify the standard types of training.
-
-You can use
-[SimpleTrainer().train()](../modules/engine.html#detectron2.engine.SimpleTrainer)
-which provides minimal abstraction for single-cost single-optimizer single-data-source training.
-The builtin `train_net.py` script uses
-[DefaultTrainer().train()](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer),
-which includes more standard default behavior that one might want to opt in,
-including default configurations for learning rate schedule,
-logging, evaluation, checkpointing etc.
-This also means that it's less likely to support some non-standard behavior
-you might want during research.
-
-To customize the training loops, you can:
-
-1. If your customization is similar to what `DefaultTrainer` is already doing,
-you can change behavior of `DefaultTrainer` by overwriting [its methods](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer)
-in a subclass, like what [tools/train_net.py](../../tools/train_net.py) does.
-2. If you need something very novel, you can start from [tools/plain_train_net.py](../../tools/plain_train_net.py) to implement them yourself.
-
-### Logging of Metrics
-
-During training, metrics are saved to a centralized [EventStorage](../modules/utils.html#detectron2.utils.events.EventStorage).
-You can use the following code to access it and log metrics to it:
-```
-from detectron2.utils.events import get_event_storage
-
-# inside the model:
-if self.training:
-  value = # compute the value from inputs
-  storage = get_event_storage()
-  storage.put_scalar("some_accuracy", value)
-```
-
-Refer to its documentation for more details.
-
-Metrics are then saved to various destinations with [EventWriter](../modules/utils.html#module-detectron2.utils.events).
-DefaultTrainer enables a few `EventWriter` with default configurations.
-See above for how to customize them.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md
deleted file mode 100644
index bb87d58..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Write Models
-
-If you are trying to do something completely new, you may wish to implement
-a model entirely from scratch within detectron2. However, in many situations you may
-be interested in modifying or extending some components of an existing model.
-Therefore, we also provide a registration mechanism that lets you override the
-behavior of certain internal components of standard models.
-
-For example, to add a new backbone, import this code in your code:
-```python
-from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec
-
-@BACKBONE_REGISTRY.register()
-class ToyBackBone(Backbone):
-  def __init__(self, cfg, input_shape):
-    # create your own backbone
-    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=16, padding=3)
-
-  def forward(self, image):
-    return {"conv1": self.conv1(image)}
-
-  def output_shape(self):
-    return {"conv1": ShapeSpec(channels=64, stride=16)}
-```
-Then, you can use `cfg.MODEL.BACKBONE.NAME = 'ToyBackBone'` in your config object.
-`build_model(cfg)` will then call your `ToyBackBone` instead.
-
-As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture,
-you can implement a new
-[ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`.
-See [densepose in detectron2](../../projects/DensePose)
-and [meshrcnn](https://github.com/facebookresearch/meshrcnn)
-for examples that implement new ROIHeads to perform new tasks.
-And [projects/](../../projects/)
-contains more examples that implement different architectures.
-
-A complete list of registries can be found in [API documentation](../modules/modeling.html#model-registries).
-You can register components in these registries to customize different parts of a model, or the
-entire model.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md
deleted file mode 100644
index fd2f1ee..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# DensePose in Detectron2
-**Dense Human Pose Estimation In The Wild**
-
-_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
-
-[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
-
-Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
-
-<div align="center">
-  <img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
-</div>
-
-In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
-DensePose annotation and results.
-
-# Quick Start
-
-See [ Getting Started ](doc/GETTING_STARTED.md)
-
-# Model Zoo and Baselines
-
-We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
-
-# License
-
-Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
-
-## <a name="CitingDensePose"></a>Citing DensePose
-
-If you use DensePose, please take the references from the following BibTeX entries:
-
-For DensePose with estimated confidences:
-
-```
-@InProceedings{Neverova2019DensePoseConfidences,
-    title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
-    author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
-    journal = {Advances in Neural Information Processing Systems},
-    year = {2019},
-}
-```
-
-For the original DensePose:
-
-```
-@InProceedings{Guler2018DensePose,
-  title={DensePose: Dense Human Pose Estimation In The Wild},
-  author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
-  journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
-  year={2018}
-}
-```
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py
deleted file mode 100644
index 7262f7c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py
+++ /dev/null
@@ -1,318 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import argparse
-import glob
-import logging
-import os
-import pickle
-import sys
-from typing import Any, ClassVar, Dict, List
-import torch
-
-from detectron2.config import get_cfg
-from detectron2.data.detection_utils import read_image
-from detectron2.engine.defaults import DefaultPredictor
-from detectron2.structures.boxes import BoxMode
-from detectron2.structures.instances import Instances
-from detectron2.utils.logger import setup_logger
-
-from densepose import add_densepose_config
-from densepose.utils.logger import verbosity_to_level
-from densepose.vis.base import CompoundVisualizer
-from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
-from densepose.vis.densepose import (
-    DensePoseResultsContourVisualizer,
-    DensePoseResultsFineSegmentationVisualizer,
-    DensePoseResultsUVisualizer,
-    DensePoseResultsVVisualizer,
-)
-from densepose.vis.extractor import CompoundExtractor, create_extractor
-
-DOC = """Apply Net - a tool to print / visualize DensePose results
-"""
-
-LOGGER_NAME = "apply_net"
-logger = logging.getLogger(LOGGER_NAME)
-
-_ACTION_REGISTRY: Dict[str, "Action"] = {}
-
-
-class Action(object):
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        parser.add_argument(
-            "-v",
-            "--verbosity",
-            action="count",
-            help="Verbose mode. Multiple -v options increase the verbosity.",
-        )
-
-
-def register_action(cls: type):
-    """
-    Decorator for action classes to automate action registration
-    """
-    global _ACTION_REGISTRY
-    _ACTION_REGISTRY[cls.COMMAND] = cls
-    return cls
-
-
-class InferenceAction(Action):
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(InferenceAction, cls).add_arguments(parser)
-        parser.add_argument("cfg", metavar="<config>", help="Config file")
-        parser.add_argument("model", metavar="<model>", help="Model file")
-        parser.add_argument("input", metavar="<input>", help="Input data")
-        parser.add_argument(
-            "--opts",
-            help="Modify config options using the command-line 'KEY VALUE' pairs",
-            default=[],
-            nargs=argparse.REMAINDER,
-        )
-
-    @classmethod
-    def execute(cls: type, args: argparse.Namespace):
-        logger.info(f"Loading config from {args.cfg}")
-        opts = []
-        cfg = cls.setup_config(args.cfg, args.model, args, opts)
-        logger.info(f"Loading model from {args.model}")
-        predictor = DefaultPredictor(cfg)
-        logger.info(f"Loading data from {args.input}")
-        file_list = cls._get_input_file_list(args.input)
-        if len(file_list) == 0:
-            logger.warning(f"No input images for {args.input}")
-            return
-        context = cls.create_context(args)
-        for file_name in file_list:
-            img = read_image(file_name, format="BGR")  # predictor expects BGR image.
-            with torch.no_grad():
-                outputs = predictor(img)["instances"]
-                cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
-        cls.postexecute(context)
-
-    @classmethod
-    def setup_config(
-        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
-    ):
-        cfg = get_cfg()
-        add_densepose_config(cfg)
-        cfg.merge_from_file(config_fpath)
-        cfg.merge_from_list(args.opts)
-        if opts:
-            cfg.merge_from_list(opts)
-        cfg.MODEL.WEIGHTS = model_fpath
-        cfg.freeze()
-        return cfg
-
-    @classmethod
-    def _get_input_file_list(cls: type, input_spec: str):
-        if os.path.isdir(input_spec):
-            file_list = [
-                os.path.join(input_spec, fname)
-                for fname in os.listdir(input_spec)
-                if os.path.isfile(os.path.join(input_spec, fname))
-            ]
-        elif os.path.isfile(input_spec):
-            file_list = [input_spec]
-        else:
-            file_list = glob.glob(input_spec)
-        return file_list
-
-
-@register_action
-class DumpAction(InferenceAction):
-    """
-    Dump action that outputs results to a pickle file
-    """
-
-    COMMAND: ClassVar[str] = "dump"
-
-    @classmethod
-    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
-        parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
-        cls.add_arguments(parser)
-        parser.set_defaults(func=cls.execute)
-
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(DumpAction, cls).add_arguments(parser)
-        parser.add_argument(
-            "--output",
-            metavar="<dump_file>",
-            default="results.pkl",
-            help="File name to save dump to",
-        )
-
-    @classmethod
-    def execute_on_outputs(
-        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
-    ):
-        image_fpath = entry["file_name"]
-        logger.info(f"Processing {image_fpath}")
-        result = {"file_name": image_fpath}
-        if outputs.has("scores"):
-            result["scores"] = outputs.get("scores").cpu()
-        if outputs.has("pred_boxes"):
-            result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
-            if outputs.has("pred_densepose"):
-                boxes_XYWH = BoxMode.convert(
-                    result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
-                )
-                result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
-        context["results"].append(result)
-
-    @classmethod
-    def create_context(cls: type, args: argparse.Namespace):
-        context = {"results": [], "out_fname": args.output}
-        return context
-
-    @classmethod
-    def postexecute(cls: type, context: Dict[str, Any]):
-        out_fname = context["out_fname"]
-        out_dir = os.path.dirname(out_fname)
-        if len(out_dir) > 0 and not os.path.exists(out_dir):
-            os.makedirs(out_dir)
-        with open(out_fname, "wb") as hFile:
-            pickle.dump(context["results"], hFile)
-            logger.info(f"Output saved to {out_fname}")
-
-
-@register_action
-class ShowAction(InferenceAction):
-    """
-    Show action that visualizes selected entries on an image
-    """
-
-    COMMAND: ClassVar[str] = "show"
-    VISUALIZERS: ClassVar[Dict[str, object]] = {
-        "dp_contour": DensePoseResultsContourVisualizer,
-        "dp_segm": DensePoseResultsFineSegmentationVisualizer,
-        "dp_u": DensePoseResultsUVisualizer,
-        "dp_v": DensePoseResultsVVisualizer,
-        "bbox": ScoredBoundingBoxVisualizer,
-    }
-
-    @classmethod
-    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
-        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
-        cls.add_arguments(parser)
-        parser.set_defaults(func=cls.execute)
-
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(ShowAction, cls).add_arguments(parser)
-        parser.add_argument(
-            "visualizations",
-            metavar="<visualizations>",
-            help="Comma separated list of visualizations, possible values: "
-            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
-        )
-        parser.add_argument(
-            "--min_score",
-            metavar="<score>",
-            default=0.8,
-            type=float,
-            help="Minimum detection score to visualize",
-        )
-        parser.add_argument(
-            "--nms_thresh", metavar="<threshold>", default=None, type=float, help="NMS threshold"
-        )
-        parser.add_argument(
-            "--output",
-            metavar="<image_file>",
-            default="outputres.png",
-            help="File name to save output to",
-        )
-
-    @classmethod
-    def setup_config(
-        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
-    ):
-        opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
-        opts.append(str(args.min_score))
-        if args.nms_thresh is not None:
-            opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
-            opts.append(str(args.nms_thresh))
-        cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
-        return cfg
-
-    @classmethod
-    def execute_on_outputs(
-        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
-    ):
-        import cv2
-        import numpy as np
-
-        visualizer = context["visualizer"]
-        extractor = context["extractor"]
-        image_fpath = entry["file_name"]
-        logger.info(f"Processing {image_fpath}")
-        image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
-        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
-        data = extractor(outputs)
-        image_vis = visualizer.visualize(image, data)
-        entry_idx = context["entry_idx"] + 1
-        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
-        out_dir = os.path.dirname(out_fname)
-        if len(out_dir) > 0 and not os.path.exists(out_dir):
-            os.makedirs(out_dir)
-        cv2.imwrite(out_fname, image_vis)
-        logger.info(f"Output saved to {out_fname}")
-        context["entry_idx"] += 1
-
-    @classmethod
-    def postexecute(cls: type, context: Dict[str, Any]):
-        pass
-
-    @classmethod
-    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
-        base, ext = os.path.splitext(fname_base)
-        return base + ".{0:04d}".format(entry_idx) + ext
-
-    @classmethod
-    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
-        vis_specs = args.visualizations.split(",")
-        visualizers = []
-        extractors = []
-        for vis_spec in vis_specs:
-            vis = cls.VISUALIZERS[vis_spec]()
-            visualizers.append(vis)
-            extractor = create_extractor(vis)
-            extractors.append(extractor)
-        visualizer = CompoundVisualizer(visualizers)
-        extractor = CompoundExtractor(extractors)
-        context = {
-            "extractor": extractor,
-            "visualizer": visualizer,
-            "out_fname": args.output,
-            "entry_idx": 0,
-        }
-        return context
-
-
-def create_argument_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        description=DOC,
-        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
-    )
-    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
-    subparsers = parser.add_subparsers(title="Actions")
-    for _, action in _ACTION_REGISTRY.items():
-        action.add_parser(subparsers)
-    return parser
-
-
-def main():
-    parser = create_argument_parser()
-    args = parser.parse_args()
-    verbosity = args.verbosity if hasattr(args, "verbosity") else None
-    global logger
-    logger = setup_logger(name=LOGGER_NAME)
-    logger.setLevel(verbosity_to_level(verbosity))
-    args.func(args)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
deleted file mode 100644
index 3ed1bcd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-
-  DENSEPOSE_ON: True
-  ROI_HEADS:
-    NAME: "DensePoseROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-    NUM_CLASSES: 1
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-    POOLER_SAMPLING_RATIO: 2
-    POOLER_TYPE: "ROIAlign"
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseV1ConvXHead"
-    POOLER_TYPE: "ROIAlign"
-    NUM_COARSE_SEGM_CHANNELS: 2
-DATASETS:
-  TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
-  TEST: ("densepose_coco_2014_minival",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.01
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-  WARMUP_FACTOR: 0.1
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
deleted file mode 100644
index 15475b1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "iid_iso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
deleted file mode 100644
index 7546b96..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "indep_aniso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
deleted file mode 100644
index 045f7f0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-SOLVER:
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
deleted file mode 100644
index ace6209..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "iid_iso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
deleted file mode 100644
index 766c098..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "indep_aniso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
deleted file mode 100644
index af44fb7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
deleted file mode 100644
index 8e79a1b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  RESNETS:
-    DEPTH: 101
-  ROI_DENSEPOSE_HEAD:
-    NUM_COARSE_SEGM_CHANNELS: 15
-    POOLER_RESOLUTION: 14
-    HEATMAP_SIZE: 56
-    INDEX_WEIGHTS: 2.0
-    PART_WEIGHTS: 0.3
-    POINT_REGRESSION_WEIGHTS: 0.1
-    DECODER_ON: False
-SOLVER:
-  BASE_LR: 0.002
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
deleted file mode 100644
index f3720ef..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "iid_iso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
deleted file mode 100644
index 5a47cc0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "indep_aniso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
deleted file mode 100644
index 52a170b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-SOLVER:
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
deleted file mode 100644
index d36e542..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "iid_iso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
deleted file mode 100644
index e880d46..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "indep_aniso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
deleted file mode 100644
index d2dd14c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
deleted file mode 100644
index 6c5391f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-_BASE_: "Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    NUM_COARSE_SEGM_CHANNELS: 15
-    POOLER_RESOLUTION: 14
-    HEATMAP_SIZE: 56
-    INDEX_WEIGHTS: 2.0
-    PART_WEIGHTS: 0.3
-    POINT_REGRESSION_WEIGHTS: 0.1
-    DECODER_ON: False
-SOLVER:
-  BASE_LR: 0.002
-  MAX_ITER: 130000
-  STEPS: (100000, 120000)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
deleted file mode 100644
index 5a20882..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
+++ /dev/null
@@ -1,91 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-    NUM_CLASSES: 1
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("base_coco_2017_train",)
-  TEST: ("base_coco_2017_val", "densepose_chimps")
-  CATEGORY_MAPS:
-    "base_coco_2017_train":
-      "16": 1 # bird -> person
-      "17": 1 # cat -> person
-      "18": 1 # dog -> person
-      "19": 1 # horse -> person
-      "20": 1 # sheep -> person
-      "21": 1 # cow -> person
-      "22": 1 # elephant -> person
-      "23": 1 # bear -> person
-      "24": 1 # zebra -> person
-      "25": 1 # girafe -> person
-    "base_coco_2017_val":
-      "16": 1 # bird -> person
-      "17": 1 # cat -> person
-      "18": 1 # dog -> person
-      "19": 1 # horse -> person
-      "20": 1 # sheep -> person
-      "21": 1 # cow -> person
-      "22": 1 # elephant -> person
-      "23": 1 # bear -> person
-      "24": 1 # zebra -> person
-      "25": 1 # girafe -> person
-  WHITELISTED_CATEGORIES:
-    "base_coco_2017_train":
-      - 1  # person
-      - 16 # bird
-      - 17 # cat
-      - 18 # dog
-      - 19 # horse
-      - 20 # sheep
-      - 21 # cow
-      - 22 # elephant
-      - 23 # bear
-      - 24 # zebra
-      - 25 # girafe
-    "base_coco_2017_val":
-      - 1  # person
-      - 16 # bird
-      - 17 # cat
-      - 18 # dog
-      - 19 # horse
-      - 20 # sheep
-      - 21 # cow
-      - 22 # elephant
-      - 23 # bear
-      - 24 # zebra
-      - 25 # girafe
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
deleted file mode 100644
index 80139ad..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-_BASE_: "Base-RCNN-FPN-MC.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  DENSEPOSE_ON: False
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
deleted file mode 100644
index b90989e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  ROI_DENSEPOSE_HEAD:
-    NAME: "DensePoseDeepLabHead"
-DATASETS:
-  TRAIN: ("densepose_coco_2014_minival_100",)
-  TEST: ("densepose_coco_2014_minival_100",)
-SOLVER:
-  MAX_ITER: 40
-  STEPS: (30,)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
deleted file mode 100644
index 7d41274..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
-MODEL:
-  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
-DATASETS:
-  TRAIN: ()
-  TEST: ("densepose_coco_2014_minival_100",)
-TEST:
-  AUG:
-    ENABLED: True
-    MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
-    MAX_SIZE: 4000
-    FLIP: True
-  EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP",  60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
deleted file mode 100644
index f0fe611..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "iid_iso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-DATASETS:
-  TRAIN: ("densepose_coco_2014_minival_100",)
-  TEST: ("densepose_coco_2014_minival_100",)
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 40
-  STEPS: (30,)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
deleted file mode 100644
index f0d9358..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-  ROI_DENSEPOSE_HEAD:
-    UV_CONFIDENCE:
-      ENABLED: True
-      TYPE: "indep_aniso"
-    POINT_REGRESSION_WEIGHTS: 0.0005
-DATASETS:
-  TRAIN: ("densepose_coco_2014_minival_100",)
-  TEST: ("densepose_coco_2014_minival_100",)
-SOLVER:
-  CLIP_GRADIENTS:
-    ENABLED: True
-  MAX_ITER: 40 
-  STEPS: (30,)
-  WARMUP_FACTOR: 0.025
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
deleted file mode 100644
index 3c5a7d2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
-MODEL:
-  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
-DATASETS:
-  TRAIN: ()
-  TEST: ("densepose_coco_2014_minival_100",)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP",  60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
deleted file mode 100644
index 057c876..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-DATASETS:
-  TRAIN: ("densepose_coco_2014_minival_100",)
-  TEST: ("densepose_coco_2014_minival_100",)
-SOLVER:
-  MAX_ITER: 40
-  STEPS: (30,)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
deleted file mode 100644
index b991160..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  ROI_HEADS:
-    NUM_CLASSES: 1
-DATASETS:
-  TRAIN: ("densepose_coco_2014_minival",)
-  TEST: ("densepose_coco_2014_minival",)
-SOLVER:
-  MAX_ITER: 6000
-  STEPS: (5500, 5800)
-TEST:
-  EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]]
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py
deleted file mode 100644
index aea5a1a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .data.datasets import builtin  # just to register data
-from .config import add_densepose_config, add_dataset_category_config
-from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
-from .evaluator import DensePoseCOCOEvaluator
-from .roi_head import DensePoseROIHeads
-from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
-from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
-from .utils.transform import load_from_cfg
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
deleted file mode 100644
index 2d76056..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# -*- coding = utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from detectron2.config import CfgNode as CN
-
-
-def add_dataset_category_config(cfg: CN):
-    """
-    Add config for additional category-related dataset options
-     - category whitelisting
-     - category mapping
-    """
-    _C = cfg
-    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
-    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
-
-
-def add_densepose_config(cfg: CN):
-    """
-    Add config for densepose head.
-    """
-    _C = cfg
-
-    _C.MODEL.DENSEPOSE_ON = True
-
-    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
-    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
-    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
-    # Number of parts used for point labels
-    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
-    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
-    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
-    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
-    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
-    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
-    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
-    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
-    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
-    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
-    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
-    # Loss weights for annotation masks.(14 Parts)
-    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
-    # Loss weights for surface parts. (24 Parts)
-    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
-    # Loss weights for UV regression.
-    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
-    # For Decoder
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
-    # For DeepLab head
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
-    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
-    # Confidences
-    # Enable learning confidences (variances) along with the actual values
-    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
-    # UV confidence lower bound
-    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
-    # Statistical model type for confidence learning, possible values:
-    # - "iid_iso": statistically independent identically distributed residuals
-    #    with isotropic covariance
-    # - "indep_aniso": statistically independent residuals with anisotropic
-    #    covariances
-    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py
deleted file mode 100644
index 5484f59..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from .build import build_detection_test_loader, build_detection_train_loader
-from .dataset_mapper import DatasetMapper
-
-# ensure the builtin data are registered
-from . import datasets
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
deleted file mode 100644
index c722ec1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import itertools
-import logging
-import numpy as np
-import operator
-from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
-import torch
-
-from detectron2.config import CfgNode
-from detectron2.data import samplers
-from detectron2.data.build import (
-    load_proposals_into_dataset,
-    print_instances_class_histogram,
-    trivial_batch_collator,
-    worker_init_reset_seed,
-)
-from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
-from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
-from detectron2.utils.comm import get_world_size
-
-from .dataset_mapper import DatasetMapper
-from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
-from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
-
-__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
-
-
-Instance = Dict[str, Any]
-InstancePredicate = Callable[[Instance], bool]
-
-
-def _compute_num_images_per_worker(cfg: CfgNode):
-    num_workers = get_world_size()
-    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
-    assert (
-        images_per_batch % num_workers == 0
-    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
-        images_per_batch, num_workers
-    )
-    assert (
-        images_per_batch >= num_workers
-    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
-        images_per_batch, num_workers
-    )
-    images_per_worker = images_per_batch // num_workers
-    return images_per_worker
-
-
-def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
-    meta = MetadataCatalog.get(dataset_name)
-    for dataset_dict in dataset_dicts:
-        for ann in dataset_dict["annotations"]:
-            ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
-
-
-def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
-    # merge categories for all data
-    merged_categories = {}
-    for dataset_name in dataset_names:
-        meta = MetadataCatalog.get(dataset_name)
-        for cat_id, cat_name in meta.categories.items():
-            if cat_id not in merged_categories:
-                merged_categories[cat_id] = (cat_name, dataset_name)
-                continue
-            cat_name_other, dataset_name_other = merged_categories[cat_id]
-            if cat_name_other != cat_name:
-                raise ValueError(
-                    f"Incompatible categories for category ID {cat_id}: "
-                    f'dataset {dataset_name} value "{cat_name}", '
-                    f'dataset {dataset_name_other} value "{cat_name_other}"'
-                )
-
-    merged_cat_id_to_cont_id = {}
-    for i, cat_id in enumerate(sorted(merged_categories.keys())):
-        merged_cat_id_to_cont_id[cat_id] = i
-
-    # add category maps to metadata
-    for dataset_name in dataset_names:
-        meta = MetadataCatalog.get(dataset_name)
-        categories = meta.get("categories")
-        meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
-        meta.thing_dataset_id_to_contiguous_id = {
-            cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
-        }
-        meta.thing_contiguous_id_to_dataset_id = {
-            merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
-        }
-
-
-def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
-    def has_annotations(instance: Instance) -> bool:
-        return "annotations" in instance
-
-    def has_only_crowd_anotations(instance: Instance) -> bool:
-        for ann in instance["annotations"]:
-            if ann.get("is_crowd", 0) == 0:
-                return False
-        return True
-
-    def general_keep_instance_predicate(instance: Instance) -> bool:
-        return has_annotations(instance) and not has_only_crowd_anotations(instance)
-
-    if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
-        return None
-    return general_keep_instance_predicate
-
-
-def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
-
-    min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
-
-    def has_sufficient_num_keypoints(instance: Instance) -> bool:
-        num_kpts = sum(
-            (np.array(ann["keypoints"][2::3]) > 0).sum()
-            for ann in instance["annotations"]
-            if "keypoints" in ann
-        )
-        return num_kpts >= min_num_keypoints
-
-    if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
-        return has_sufficient_num_keypoints
-    return None
-
-
-def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
-    if not cfg.MODEL.MASK_ON:
-        return None
-
-    def has_mask_annotations(instance: Instance) -> bool:
-        return any("segmentation" in ann for ann in instance["annotations"])
-
-    return has_mask_annotations
-
-
-def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
-    if not cfg.MODEL.DENSEPOSE_ON:
-        return None
-
-    def has_densepose_annotations(instance: Instance) -> bool:
-        for ann in instance["annotations"]:
-            if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
-                (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
-            ):
-                return True
-        return False
-
-    return has_densepose_annotations
-
-
-def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
-    specific_predicate_creators = [
-        _maybe_create_keypoints_keep_instance_predicate,
-        _maybe_create_mask_keep_instance_predicate,
-        _maybe_create_densepose_keep_instance_predicate,
-    ]
-    predicates = [creator(cfg) for creator in specific_predicate_creators]
-    predicates = [p for p in predicates if p is not None]
-    if not predicates:
-        return None
-
-    def combined_predicate(instance: Instance) -> bool:
-        return any(p(instance) for p in predicates)
-
-    return combined_predicate
-
-
-def _get_train_keep_instance_predicate(cfg: CfgNode):
-    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
-    combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
-
-    def combined_general_specific_keep_predicate(instance: Instance) -> bool:
-        return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
-
-    if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
-        return None
-    if general_keep_predicate is None:
-        return combined_specific_keep_predicate
-    if combined_specific_keep_predicate is None:
-        return general_keep_predicate
-    return combined_general_specific_keep_predicate
-
-
-def _get_test_keep_instance_predicate(cfg: CfgNode):
-    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
-    return general_keep_predicate
-
-
-def _maybe_filter_and_map_categories(
-    dataset_name: str, dataset_dicts: List[Instance]
-) -> List[Instance]:
-    meta = MetadataCatalog.get(dataset_name)
-    whitelisted_categories = meta.get("whitelisted_categories")
-    category_map = meta.get("category_map", {})
-    if whitelisted_categories is None and not category_map:
-        return dataset_dicts
-    filtered_dataset_dicts = []
-    for dataset_dict in dataset_dicts:
-        anns = []
-        for ann in dataset_dict["annotations"]:
-            cat_id = ann["category_id"]
-            if whitelisted_categories is not None and cat_id not in whitelisted_categories:
-                continue
-            ann["category_id"] = category_map.get(cat_id, cat_id)
-            anns.append(ann)
-        dataset_dict["annotations"] = anns
-        filtered_dataset_dicts.append(dataset_dict)
-    return filtered_dataset_dicts
-
-
-def _add_category_whitelists_to_metadata(cfg: CfgNode):
-    for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
-        meta = MetadataCatalog.get(dataset_name)
-        meta.whitelisted_categories = whitelisted_cat_ids
-        logger = logging.getLogger(__name__)
-        logger.info(
-            "Whitelisted categories for dataset {}: {}".format(
-                dataset_name, meta.whitelisted_categories
-            )
-        )
-
-
-def _add_category_maps_to_metadata(cfg: CfgNode):
-    for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
-        category_map = {
-            int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
-        }
-        meta = MetadataCatalog.get(dataset_name)
-        meta.category_map = category_map
-        logger = logging.getLogger(__name__)
-        logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
-
-
-def combine_detection_dataset_dicts(
-    dataset_names: Collection[str],
-    keep_instance_predicate: Optional[InstancePredicate] = None,
-    proposal_files: Optional[Collection[str]] = None,
-) -> List[Instance]:
-    """
-    Load and prepare dataset dicts for training / testing
-
-    Args:
-        dataset_names (Collection[str]): a list of dataset names
-        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
-            applied to instance dicts which defines whether to keep the instance
-        proposal_files (Collection[str]): if given, a list of object proposal files
-            that match each dataset in `dataset_names`.
-    """
-    assert len(dataset_names)
-    if proposal_files is None:
-        proposal_files = [None] * len(dataset_names)
-    assert len(dataset_names) == len(proposal_files)
-    # load annotations and dataset metadata
-    dataset_map = {}
-    for dataset_name in dataset_names:
-        dataset_dicts = DatasetCatalog.get(dataset_name)
-        dataset_map[dataset_name] = dataset_dicts
-    # initialize category maps
-    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
-    # apply category maps
-    all_datasets_dicts = []
-    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
-        dataset_dicts = dataset_map[dataset_name]
-        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
-        if proposal_file is not None:
-            dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
-        dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
-        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
-        print_instances_class_histogram(
-            dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
-        )
-        all_datasets_dicts.append(dataset_dicts)
-
-    if keep_instance_predicate is not None:
-        all_datasets_dicts_plain = [
-            d
-            for d in itertools.chain.from_iterable(all_datasets_dicts)
-            if keep_instance_predicate(d)
-        ]
-    else:
-        all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
-    return all_datasets_dicts_plain
-
-
-def build_detection_train_loader(cfg: CfgNode, mapper=None):
-    """
-    A data loader is created in a way similar to that of Detectron2.
-    The main differences are:
-     - it allows to combine data with different but compatible object category sets
-
-    The data loader is created by the following steps:
-    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
-    2. Start workers to work on the dicts. Each worker will:
-        * Map each metadata dict into another format to be consumed by the model.
-        * Batch them by simply putting dicts into a list.
-    The batched ``list[mapped_dict]`` is what this dataloader will return.
-
-    Args:
-        cfg (CfgNode): the config
-        mapper (callable): a callable which takes a sample (dict) from dataset and
-            returns the format to be consumed by the model.
-            By default it will be `DatasetMapper(cfg, True)`.
-
-    Returns:
-        an infinite iterator of training data
-    """
-    images_per_worker = _compute_num_images_per_worker(cfg)
-
-    _add_category_whitelists_to_metadata(cfg)
-    _add_category_maps_to_metadata(cfg)
-    dataset_dicts = combine_detection_dataset_dicts(
-        cfg.DATASETS.TRAIN,
-        keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
-        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
-    )
-    dataset = DatasetFromList(dataset_dicts, copy=False)
-
-    if mapper is None:
-        mapper = DatasetMapper(cfg, True)
-    dataset = MapDataset(dataset, mapper)
-
-    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
-    logger = logging.getLogger(__name__)
-    logger.info("Using training sampler {}".format(sampler_name))
-    if sampler_name == "TrainingSampler":
-        sampler = samplers.TrainingSampler(len(dataset))
-    elif sampler_name == "RepeatFactorTrainingSampler":
-        sampler = samplers.RepeatFactorTrainingSampler(
-            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
-        )
-    else:
-        raise ValueError("Unknown training sampler: {}".format(sampler_name))
-
-    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
-        data_loader = torch.utils.data.DataLoader(
-            dataset,
-            sampler=sampler,
-            num_workers=cfg.DATALOADER.NUM_WORKERS,
-            batch_sampler=None,
-            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
-            worker_init_fn=worker_init_reset_seed,
-        )  # yield individual mapped dict
-        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
-    else:
-        batch_sampler = torch.utils.data.sampler.BatchSampler(
-            sampler, images_per_worker, drop_last=True
-        )
-        # drop_last so the batch always have the same size
-        data_loader = torch.utils.data.DataLoader(
-            dataset,
-            num_workers=cfg.DATALOADER.NUM_WORKERS,
-            batch_sampler=batch_sampler,
-            collate_fn=trivial_batch_collator,
-            worker_init_fn=worker_init_reset_seed,
-        )
-
-    return data_loader
-
-
-def build_detection_test_loader(cfg, dataset_name, mapper=None):
-    """
-    Similar to `build_detection_train_loader`.
-    But this function uses the given `dataset_name` argument (instead of the names in cfg),
-    and uses batch size 1.
-
-    Args:
-        cfg: a detectron2 CfgNode
-        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
-        mapper (callable): a callable which takes a sample (dict) from dataset
-            and returns the format to be consumed by the model.
-            By default it will be `DatasetMapper(cfg, False)`.
-
-    Returns:
-        DataLoader: a torch DataLoader, that loads the given detection
-            dataset, with test-time transformation and batching.
-    """
-    _add_category_whitelists_to_metadata(cfg)
-    _add_category_maps_to_metadata(cfg)
-    dataset_dicts = combine_detection_dataset_dicts(
-        [dataset_name],
-        keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
-        proposal_files=[
-            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
-        ]
-        if cfg.MODEL.LOAD_PROPOSALS
-        else None,
-    )
-
-    dataset = DatasetFromList(dataset_dicts)
-    if mapper is None:
-        mapper = DatasetMapper(cfg, False)
-    dataset = MapDataset(dataset, mapper)
-
-    sampler = samplers.InferenceSampler(len(dataset))
-    # Always use 1 image per worker during inference since this is the
-    # standard when reporting inference time in papers.
-    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
-
-    data_loader = torch.utils.data.DataLoader(
-        dataset,
-        num_workers=cfg.DATALOADER.NUM_WORKERS,
-        batch_sampler=batch_sampler,
-        collate_fn=trivial_batch_collator,
-    )
-    return data_loader
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
deleted file mode 100644
index f749767..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import copy
-import torch
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import MetadataCatalog
-from detectron2.data import detection_utils as utils
-from detectron2.data import transforms as T
-
-from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
-
-
-class DatasetMapper:
-    """
-    A customized version of `detectron2.data.DatasetMapper`
-    """
-
-    def __init__(self, cfg, is_train=True):
-        self.tfm_gens = utils.build_transform_gen(cfg, is_train)
-
-        # fmt: off
-        self.img_format     = cfg.INPUT.FORMAT
-        self.mask_on        = cfg.MODEL.MASK_ON
-        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
-        self.densepose_on   = cfg.MODEL.DENSEPOSE_ON
-        assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
-        # fmt: on
-        if self.keypoint_on and is_train:
-            # Flip only makes sense in training
-            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
-        else:
-            self.keypoint_hflip_indices = None
-
-        if self.densepose_on:
-            densepose_transform_srcs = [
-                MetadataCatalog.get(ds).densepose_transform_src
-                for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
-            ]
-            assert len(densepose_transform_srcs) > 0
-            # TODO: check that DensePose transformation data is the same for
-            # all the data. Otherwise one would have to pass DB ID with
-            # each entry to select proper transformation data. For now, since
-            # all DensePose annotated data uses the same data semantics, we
-            # omit this check.
-            densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
-            self.densepose_transform_data = DensePoseTransformData.load(
-                densepose_transform_data_fpath
-            )
-
-        self.is_train = is_train
-
-    def __call__(self, dataset_dict):
-        """
-        Args:
-            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
-
-        Returns:
-            dict: a format that builtin models in detectron2 accept
-        """
-        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
-        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
-        utils.check_image_size(dataset_dict, image)
-
-        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
-        image_shape = image.shape[:2]  # h, w
-        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
-
-        if not self.is_train:
-            dataset_dict.pop("annotations", None)
-            return dataset_dict
-
-        for anno in dataset_dict["annotations"]:
-            if not self.mask_on:
-                anno.pop("segmentation", None)
-            if not self.keypoint_on:
-                anno.pop("keypoints", None)
-
-        # USER: Implement additional transformations if you have other types of data
-        # USER: Don't call transpose_densepose if you don't need
-        annos = [
-            self._transform_densepose(
-                utils.transform_instance_annotations(
-                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
-                ),
-                transforms,
-            )
-            for obj in dataset_dict.pop("annotations")
-            if obj.get("iscrowd", 0) == 0
-        ]
-        instances = utils.annotations_to_instances(annos, image_shape)
-
-        if len(annos) and "densepose" in annos[0]:
-            gt_densepose = [obj["densepose"] for obj in annos]
-            instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
-
-        dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
-        return dataset_dict
-
-    def _transform_densepose(self, annotation, transforms):
-        if not self.densepose_on:
-            return annotation
-
-        # Handle densepose annotations
-        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
-        if is_valid:
-            densepose_data = DensePoseDataRelative(annotation, cleanup=True)
-            densepose_data.apply_transform(transforms, self.densepose_transform_data)
-            annotation["densepose"] = densepose_data
-        else:
-            # logger = logging.getLogger(__name__)
-            # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
-            DensePoseDataRelative.cleanup_annotation(annotation)
-            # NOTE: annotations for certain instances may be unavailable.
-            # 'None' is accepted by the DensePostList data structure.
-            annotation["densepose"] = None
-        return annotation
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py
deleted file mode 100644
index 4a59d93..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from . import builtin  # ensure the builtin data are registered
-
-__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
deleted file mode 100644
index e70f3d3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .coco import BASE_DATASETS as BASE_COCO_DATASETS
-from .coco import DATASETS as COCO_DATASETS
-from .coco import register_datasets as register_coco_datasets
-
-DEFAULT_DATASETS_ROOT = "data"
-
-
-register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
-register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
deleted file mode 100644
index 3a96474..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import contextlib
-import io
-import logging
-import os
-from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional
-from fvcore.common.file_io import PathManager
-from fvcore.common.timer import Timer
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-DENSEPOSE_MASK_KEY = "dp_masks"
-DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
-DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
-DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
-
-
-@dataclass
-class CocoDatasetInfo:
-    name: str
-    images_root: str
-    annotations_fpath: str
-
-
-DATASETS = [
-    CocoDatasetInfo(
-        name="densepose_coco_2014_train",
-        images_root="coco/train2014",
-        annotations_fpath="coco/annotations/densepose_train2014.json",
-    ),
-    CocoDatasetInfo(
-        name="densepose_coco_2014_minival",
-        images_root="coco/val2014",
-        annotations_fpath="coco/annotations/densepose_minival2014.json",
-    ),
-    CocoDatasetInfo(
-        name="densepose_coco_2014_minival_100",
-        images_root="coco/val2014",
-        annotations_fpath="coco/annotations/densepose_minival2014_100.json",
-    ),
-    CocoDatasetInfo(
-        name="densepose_coco_2014_valminusminival",
-        images_root="coco/val2014",
-        annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
-    ),
-    CocoDatasetInfo(
-        name="densepose_chimps",
-        images_root="densepose_evolution/densepose_chimps",
-        annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
-    ),
-]
-
-
-BASE_DATASETS = [
-    CocoDatasetInfo(
-        name="base_coco_2017_train",
-        images_root="coco/train2017",
-        annotations_fpath="coco/annotations/instances_train2017.json",
-    ),
-    CocoDatasetInfo(
-        name="base_coco_2017_val",
-        images_root="coco/val2017",
-        annotations_fpath="coco/annotations/instances_val2017.json",
-    ),
-    CocoDatasetInfo(
-        name="base_coco_2017_val_100",
-        images_root="coco/val2017",
-        annotations_fpath="coco/annotations/instances_val2017_100.json",
-    ),
-]
-
-
-def _is_relative_local_path(path: os.PathLike):
-    path_str = os.fsdecode(path)
-    return ("://" not in path_str) and not os.path.isabs(path)
-
-
-def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
-    """
-    Prepends the provided path with a base path prefix if:
-    1) base path is not None;
-    2) path is a local path
-    """
-    if base_path is None:
-        return path
-    if _is_relative_local_path(path):
-        return os.path.join(base_path, path)
-    return path
-
-
-def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
-    """
-    Returns metadata associated with COCO DensePose data
-
-    Args:
-    base_path: Optional[os.PathLike]
-        Base path used to load metadata from
-
-    Returns:
-    Dict[str, Any]
-        Metadata in the form of a dictionary
-    """
-    meta = {
-        "densepose_transform_src": _maybe_prepend_base_path(
-            base_path, "UV_symmetry_transforms.mat"
-        ),
-        "densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
-        "densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
-            base_path, "SMPL_SUBDIV_TRANSFORM.mat"
-        ),
-    }
-    return meta
-
-
-def _load_coco_annotations(json_file: str):
-    """
-    Load COCO annotations from a JSON file
-
-    Args:
-        json_file: str
-            Path to the file to load annotations from
-    Returns:
-        Instance of `pycocotools.coco.COCO` that provides access to annotations
-        data
-    """
-    from pycocotools.coco import COCO
-
-    logger = logging.getLogger(__name__)
-    timer = Timer()
-    with contextlib.redirect_stdout(io.StringIO()):
-        coco_api = COCO(json_file)
-    if timer.seconds() > 1:
-        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
-    return coco_api
-
-
-def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
-    meta = MetadataCatalog.get(dataset_name)
-    meta.categories = {c["id"]: c["name"] for c in categories}
-    logger = logging.getLogger(__name__)
-    logger.info("Dataset {} categories: {}".format(dataset_name, categories))
-
-
-def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
-    if "minival" in json_file:
-        # Skip validation on COCO2014 valminusminival and minival annotations
-        # The ratio of buggy annotations there is tiny and does not affect accuracy
-        # Therefore we explicitly white-list them
-        return
-    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
-    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
-        json_file
-    )
-
-
-def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
-    if "bbox" not in ann_dict:
-        return
-    obj["bbox"] = ann_dict["bbox"]
-    obj["bbox_mode"] = BoxMode.XYWH_ABS
-
-
-def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
-    if "segmentation" not in ann_dict:
-        return
-    segm = ann_dict["segmentation"]
-    if not isinstance(segm, dict):
-        # filter out invalid polygons (< 3 points)
-        segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
-        if len(segm) == 0:
-            return
-    obj["segmentation"] = segm
-
-
-def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
-    if "keypoints" not in ann_dict:
-        return
-    keypts = ann_dict["keypoints"]  # list[int]
-    for idx, v in enumerate(keypts):
-        if idx % 3 != 2:
-            # COCO's segmentation coordinates are floating points in [0, H or W],
-            # but keypoint coordinates are integers in [0, H-1 or W-1]
-            # Therefore we assume the coordinates are "pixel indices" and
-            # add 0.5 to convert to floating point coordinates.
-            keypts[idx] = v + 0.5
-    obj["keypoints"] = keypts
-
-
-def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
-    for key in DENSEPOSE_KEYS:
-        if key in ann_dict:
-            obj[key] = ann_dict[key]
-
-
-def _combine_images_with_annotations(
-    dataset_name: str,
-    image_root: str,
-    img_datas: Iterable[Dict[str, Any]],
-    ann_datas: Iterable[Iterable[Dict[str, Any]]],
-):
-
-    ann_keys = ["iscrowd", "category_id"]
-    dataset_dicts = []
-
-    for img_dict, ann_dicts in zip(img_datas, ann_datas):
-        record = {}
-        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
-        record["height"] = img_dict["height"]
-        record["width"] = img_dict["width"]
-        record["image_id"] = img_dict["id"]
-        record["dataset"] = dataset_name
-        objs = []
-        for ann_dict in ann_dicts:
-            assert ann_dict["image_id"] == record["image_id"]
-            assert ann_dict.get("ignore", 0) == 0
-            obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
-            _maybe_add_bbox(obj, ann_dict)
-            _maybe_add_segm(obj, ann_dict)
-            _maybe_add_keypoints(obj, ann_dict)
-            _maybe_add_densepose(obj, ann_dict)
-            objs.append(obj)
-        record["annotations"] = objs
-        dataset_dicts.append(record)
-    return dataset_dicts
-
-
-def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
-    """
-    Loads a JSON file with annotations in COCO instances format.
-    Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
-    in a more flexible way. Postpones category mapping to a later stage to be
-    able to combine several data with different (but coherent) sets of
-    categories.
-
-    Args:
-
-    annotations_json_file: str
-        Path to the JSON file with annotations in COCO instances format.
-    image_root: str
-        directory that contains all the images
-    dataset_name: str
-        the name that identifies a dataset, e.g. "densepose_coco_2014_train"
-    extra_annotation_keys: Optional[List[str]]
-        If provided, these keys are used to extract additional data from
-        the annotations.
-    """
-    coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
-    _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
-    # sort indices for reproducible results
-    img_ids = sorted(coco_api.imgs.keys())
-    # imgs is a list of dicts, each looks something like:
-    # {'license': 4,
-    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
-    #  'file_name': 'COCO_val2014_000000001268.jpg',
-    #  'height': 427,
-    #  'width': 640,
-    #  'date_captured': '2013-11-17 05:57:24',
-    #  'id': 1268}
-    imgs = coco_api.loadImgs(img_ids)
-    logger = logging.getLogger(__name__)
-    logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
-    # anns is a list[list[dict]], where each dict is an annotation
-    # record for an object. The inner list enumerates the objects in an image
-    # and the outer list enumerates over images.
-    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
-    _verify_annotations_have_unique_ids(annotations_json_file, anns)
-    dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
-    return dataset_records
-
-
-def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
-    """
-    Registers provided COCO DensePose dataset
-
-    Args:
-    dataset_data: CocoDatasetInfo
-        Dataset data
-    datasets_root: Optional[os.PathLike]
-        Datasets root folder (default: None)
-    """
-    annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
-    images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
-
-    def load_annotations():
-        return load_coco_json(
-            annotations_json_file=annotations_fpath,
-            image_root=images_root,
-            dataset_name=dataset_data.name,
-        )
-
-    DatasetCatalog.register(dataset_data.name, load_annotations)
-    MetadataCatalog.get(dataset_data.name).set(
-        json_file=annotations_fpath,
-        image_root=images_root,
-        **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
-    )
-
-
-def register_datasets(
-    datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
-):
-    """
-    Registers provided COCO DensePose data
-
-    Args:
-    datasets_data: Iterable[CocoDatasetInfo]
-        An iterable of dataset datas
-    datasets_root: Optional[os.PathLike]
-        Datasets root folder (default: None)
-    """
-    for dataset_data in datasets_data:
-        register_dataset(dataset_data, datasets_root)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
deleted file mode 100644
index bbb950b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
+++ /dev/null
@@ -1,579 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import base64
-import numpy as np
-from io import BytesIO
-import torch
-from PIL import Image
-from torch.nn import functional as F
-
-
-class DensePoseTransformData(object):
-
-    # Horizontal symmetry label transforms used for horizontal flip
-    MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
-    # fmt: off
-    POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23]  # noqa
-    # fmt: on
-
-    def __init__(self, uv_symmetries):
-        self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
-        self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
-        self.uv_symmetries = uv_symmetries
-
-    @staticmethod
-    def load(fpath):
-        import scipy.io
-
-        uv_symmetry_map = scipy.io.loadmat(fpath)
-        uv_symmetry_map_torch = {}
-        for key in ["U_transforms", "V_transforms"]:
-            uv_symmetry_map_torch[key] = []
-            map_src = uv_symmetry_map[key]
-            map_dst = uv_symmetry_map_torch[key]
-            for i in range(map_src.shape[1]):
-                map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
-            uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
-                device=torch.cuda.current_device()
-            )
-        transform_data = DensePoseTransformData(uv_symmetry_map_torch)
-        return transform_data
-
-
-class DensePoseDataRelative(object):
-    """
-    Dense pose relative annotations that can be applied to any bounding box:
-        x - normalized X coordinates [0, 255] of annotated points
-        y - normalized Y coordinates [0, 255] of annotated points
-        i - body part labels 0,...,24 for annotated points
-        u - body part U coordinates [0, 1] for annotated points
-        v - body part V coordinates [0, 1] for annotated points
-        segm - 256x256 segmentation mask with values 0,...,14
-    To obtain absolute x and y data wrt some bounding box one needs to first
-    divide the data by 256, multiply by the respective bounding box size
-    and add bounding box offset:
-        x_img = x0 + x_norm * w / 256.0
-        y_img = y0 + y_norm * h / 256.0
-    Segmentation masks are typically sampled to get image-based masks.
-    """
-
-    # Key for normalized X coordinates in annotation dict
-    X_KEY = "dp_x"
-    # Key for normalized Y coordinates in annotation dict
-    Y_KEY = "dp_y"
-    # Key for U part coordinates in annotation dict
-    U_KEY = "dp_U"
-    # Key for V part coordinates in annotation dict
-    V_KEY = "dp_V"
-    # Key for I point labels in annotation dict
-    I_KEY = "dp_I"
-    # Key for segmentation mask in annotation dict
-    S_KEY = "dp_masks"
-    # Number of body parts in segmentation masks
-    N_BODY_PARTS = 14
-    # Number of parts in point labels
-    N_PART_LABELS = 24
-    MASK_SIZE = 256
-
-    def __init__(self, annotation, cleanup=False):
-        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
-        assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
-        self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
-        self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
-        self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
-        self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
-        self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
-        self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
-        self.device = torch.device("cpu")
-        if cleanup:
-            DensePoseDataRelative.cleanup_annotation(annotation)
-
-    def to(self, device):
-        if self.device == device:
-            return self
-        new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
-        new_data.x = self.x
-        new_data.x = self.x.to(device)
-        new_data.y = self.y.to(device)
-        new_data.i = self.i.to(device)
-        new_data.u = self.u.to(device)
-        new_data.v = self.v.to(device)
-        new_data.segm = self.segm.to(device)
-        new_data.device = device
-        return new_data
-
-    @staticmethod
-    def extract_segmentation_mask(annotation):
-        import pycocotools.mask as mask_utils
-
-        poly_specs = annotation[DensePoseDataRelative.S_KEY]
-        segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
-        for i in range(DensePoseDataRelative.N_BODY_PARTS):
-            poly_i = poly_specs[i]
-            if poly_i:
-                mask_i = mask_utils.decode(poly_i)
-                segm[mask_i > 0] = i + 1
-        return segm
-
-    @staticmethod
-    def validate_annotation(annotation):
-        for key in [
-            DensePoseDataRelative.X_KEY,
-            DensePoseDataRelative.Y_KEY,
-            DensePoseDataRelative.I_KEY,
-            DensePoseDataRelative.U_KEY,
-            DensePoseDataRelative.V_KEY,
-            DensePoseDataRelative.S_KEY,
-        ]:
-            if key not in annotation:
-                return False, "no {key} data in the annotation".format(key=key)
-        return True, None
-
-    @staticmethod
-    def cleanup_annotation(annotation):
-        for key in [
-            DensePoseDataRelative.X_KEY,
-            DensePoseDataRelative.Y_KEY,
-            DensePoseDataRelative.I_KEY,
-            DensePoseDataRelative.U_KEY,
-            DensePoseDataRelative.V_KEY,
-            DensePoseDataRelative.S_KEY,
-        ]:
-            if key in annotation:
-                del annotation[key]
-
-    def apply_transform(self, transforms, densepose_transform_data):
-        self._transform_pts(transforms, densepose_transform_data)
-        self._transform_segm(transforms, densepose_transform_data)
-
-    def _transform_pts(self, transforms, dp_transform_data):
-        import detectron2.data.transforms as T
-
-        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
-        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
-        if do_hflip:
-            self.x = self.segm.size(1) - self.x
-            self._flip_iuv_semantics(dp_transform_data)
-
-    def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
-        i_old = self.i.clone()
-        uv_symmetries = dp_transform_data.uv_symmetries
-        pt_label_symmetries = dp_transform_data.point_label_symmetries
-        for i in range(self.N_PART_LABELS):
-            if i + 1 in i_old:
-                annot_indices_i = i_old == i + 1
-                if pt_label_symmetries[i + 1] != i + 1:
-                    self.i[annot_indices_i] = pt_label_symmetries[i + 1]
-                u_loc = (self.u[annot_indices_i] * 255).long()
-                v_loc = (self.v[annot_indices_i] * 255).long()
-                self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
-                    device=self.u.device
-                )
-                self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
-                    device=self.v.device
-                )
-
-    def _transform_segm(self, transforms, dp_transform_data):
-        import detectron2.data.transforms as T
-
-        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
-        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
-        if do_hflip:
-            self.segm = torch.flip(self.segm, [1])
-            self._flip_segm_semantics(dp_transform_data)
-
-    def _flip_segm_semantics(self, dp_transform_data):
-        old_segm = self.segm.clone()
-        mask_label_symmetries = dp_transform_data.mask_label_symmetries
-        for i in range(self.N_BODY_PARTS):
-            if mask_label_symmetries[i + 1] != i + 1:
-                self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
-
-
-def normalized_coords_transform(x0, y0, w, h):
-    """
-    Coordinates transform that maps top left corner to (-1, -1) and bottom
-    right corner to (1, 1). Used for torch.grid_sample to initialize the
-    grid
-    """
-
-    def f(p):
-        return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
-
-    return f
-
-
-class DensePoseOutput(object):
-    def __init__(self, S, I, U, V, confidences):
-        """
-        Args:
-            S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
-            I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
-            U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
-            V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
-            confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
-        """
-        self.S = S
-        self.I = I  # noqa: E741
-        self.U = U
-        self.V = V
-        self.confidences = confidences
-        self._check_output_dims(S, I, U, V)
-
-    def _check_output_dims(self, S, I, U, V):
-        assert (
-            len(S.size()) == 4
-        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
-            S.size()
-        )
-        assert (
-            len(I.size()) == 4
-        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
-            S.size()
-        )
-        assert (
-            len(U.size()) == 4
-        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
-            S.size()
-        )
-        assert (
-            len(V.size()) == 4
-        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
-            S.size()
-        )
-        assert len(S) == len(I), (
-            "Number of output segmentation planes {} "
-            "should be equal to the number of output part index "
-            "planes {}".format(len(S), len(I))
-        )
-        assert S.size()[2:] == I.size()[2:], (
-            "Output segmentation plane size {} "
-            "should be equal to the output part index "
-            "plane size {}".format(S.size()[2:], I.size()[2:])
-        )
-        assert I.size() == U.size(), (
-            "Part index output shape {} "
-            "should be the same as U coordinates output shape {}".format(I.size(), U.size())
-        )
-        assert I.size() == V.size(), (
-            "Part index output shape {} "
-            "should be the same as V coordinates output shape {}".format(I.size(), V.size())
-        )
-
-    def resize(self, image_size_hw):
-        # do nothing - outputs are invariant to resize
-        pass
-
-    def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
-        """
-        Resample S, I, U, V from bbox_old to the cropped bbox_new
-        """
-        x0old, y0old, wold, hold = bbox_old_xywh
-        x0new, y0new, wnew, hnew = bbox_new_xywh
-        tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
-        topleft = (x0new, y0new)
-        bottomright = (x0new + wnew, y0new + hnew)
-        topleft_norm = tr_coords(topleft)
-        bottomright_norm = tr_coords(bottomright)
-        hsize = S.size(1)
-        wsize = S.size(2)
-        grid = torch.meshgrid(
-            torch.arange(
-                topleft_norm[1],
-                bottomright_norm[1],
-                (bottomright_norm[1] - topleft_norm[1]) / hsize,
-            )[:hsize],
-            torch.arange(
-                topleft_norm[0],
-                bottomright_norm[0],
-                (bottomright_norm[0] - topleft_norm[0]) / wsize,
-            )[:wsize],
-        )
-        grid = torch.stack(grid, dim=2).to(S.device)
-        assert (
-            grid.size(0) == hsize
-        ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
-        assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
-            wsize, grid.size(1)
-        )
-        S_new = F.grid_sample(
-            S.unsqueeze(0),
-            torch.unsqueeze(grid, 0),
-            mode="bilinear",
-            padding_mode="border",
-            align_corners=True,
-        ).squeeze(0)
-        I_new = F.grid_sample(
-            I.unsqueeze(0),
-            torch.unsqueeze(grid, 0),
-            mode="bilinear",
-            padding_mode="border",
-            align_corners=True,
-        ).squeeze(0)
-        U_new = F.grid_sample(
-            U.unsqueeze(0),
-            torch.unsqueeze(grid, 0),
-            mode="bilinear",
-            padding_mode="border",
-            align_corners=True,
-        ).squeeze(0)
-        V_new = F.grid_sample(
-            V.unsqueeze(0),
-            torch.unsqueeze(grid, 0),
-            mode="bilinear",
-            padding_mode="border",
-            align_corners=True,
-        ).squeeze(0)
-        return S_new, I_new, U_new, V_new
-
-    def crop(self, indices_cropped, bboxes_old, bboxes_new):
-        """
-        Crop outputs for selected bounding boxes to the new bounding boxes.
-        """
-        # VK: cropping is ignored for now
-        # for i, ic in enumerate(indices_cropped):
-        #    self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
-        #        self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
-        #        bboxes_old[i], bboxes_new[i])
-        pass
-
-    def hflip(self, transform_data: DensePoseTransformData) -> None:
-        """
-        Change S, I, U and V to take into account a Horizontal flip.
-        """
-        if self.I.shape[0] > 0:
-            for el in "SIUV":
-                self.__dict__[el] = torch.flip(self.__dict__[el], [3])
-            self._flip_iuv_semantics_tensor(transform_data)
-            self._flip_segm_semantics_tensor(transform_data)
-
-    def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
-        point_label_symmetries = dp_transform_data.point_label_symmetries
-        uv_symmetries = dp_transform_data.uv_symmetries
-
-        N, C, H, W = self.U.shape
-        u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
-        v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
-        Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
-            N, C - 1, H, W
-        )
-        self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
-            device=self.U.device
-        )
-        self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
-            device=self.V.device
-        )
-
-        for el in "IUV":
-            self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
-
-    def _flip_segm_semantics_tensor(self, dp_transform_data):
-        if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
-            self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
-
-    def to_result(self, boxes_xywh):
-        """
-        Convert DensePose outputs to results format. Results are more compact,
-        but cannot be resampled any more
-        """
-        result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
-        return result
-
-    def __getitem__(self, item):
-        if isinstance(item, int):
-            S_selected = self.S[item].unsqueeze(0)
-            I_selected = self.I[item].unsqueeze(0)
-            U_selected = self.U[item].unsqueeze(0)
-            V_selected = self.V[item].unsqueeze(0)
-            conf_selected = {}
-            for key in self.confidences:
-                conf_selected[key] = self.confidences[key][item].unsqueeze(0)
-        else:
-            S_selected = self.S[item]
-            I_selected = self.I[item]
-            U_selected = self.U[item]
-            V_selected = self.V[item]
-            conf_selected = {}
-            for key in self.confidences:
-                conf_selected[key] = self.confidences[key][item]
-        return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
-
-    def __str__(self):
-        s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
-            list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
-        )
-        s_conf = "confidences: [{}]".format(
-            ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
-        )
-        return ", ".join([s, s_conf])
-
-    def __len__(self):
-        return self.S.size(0)
-
-
-class DensePoseResult(object):
-    def __init__(self, boxes_xywh, S, I, U, V):
-        self.results = []
-        self.boxes_xywh = boxes_xywh.cpu().tolist()
-        assert len(boxes_xywh.size()) == 2
-        assert boxes_xywh.size(1) == 4
-        for i, box_xywh in enumerate(boxes_xywh):
-            result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
-            result_numpy_i = result_i.cpu().numpy()
-            result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
-            result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
-            self.results.append(result_encoded_with_shape_i)
-
-    def __str__(self):
-        s = "DensePoseResult: N={} [{}]".format(
-            len(self.results), ", ".join([str(list(r[0])) for r in self.results])
-        )
-        return s
-
-    def _output_to_result(self, box_xywh, S, I, U, V):
-        x, y, w, h = box_xywh
-        w = max(int(w), 1)
-        h = max(int(h), 1)
-        result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
-        assert (
-            len(S.size()) == 4
-        ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
-        s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
-        assert (
-            len(I.size()) == 4
-        ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
-        i_bbox = (
-            F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
-            * (s_bbox > 0).long()
-        ).squeeze(0)
-        assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
-            4, len(U.size())
-        )
-        u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
-        assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
-            4, len(V.size())
-        )
-        v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
-        result[0] = i_bbox
-        for part_id in range(1, u_bbox.size(1)):
-            result[1][i_bbox == part_id] = (
-                (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
-            )
-            result[2][i_bbox == part_id] = (
-                (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
-            )
-        assert (
-            result.size(1) == h
-        ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
-        assert (
-            result.size(2) == w
-        ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
-        return result
-
-    @staticmethod
-    def encode_png_data(arr):
-        """
-        Encode array data as a PNG image using the highest compression rate
-        @param arr [in] Data stored in an array of size (3, M, N) of type uint8
-        @return Base64-encoded string containing PNG-compressed data
-        """
-        assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
-            len(arr.shape)
-        )
-        assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
-            arr.shape[0]
-        )
-        assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
-            arr.dtype
-        )
-        data = np.moveaxis(arr, 0, -1)
-        im = Image.fromarray(data)
-        fstream = BytesIO()
-        im.save(fstream, format="png", optimize=True)
-        s = base64.encodebytes(fstream.getvalue()).decode()
-        return s
-
-    @staticmethod
-    def decode_png_data(shape, s):
-        """
-        Decode array data from a string that contains PNG-compressed data
-        @param Base64-encoded string containing PNG-compressed data
-        @return Data stored in an array of size (3, M, N) of type uint8
-        """
-        fstream = BytesIO(base64.decodebytes(s.encode()))
-        im = Image.open(fstream)
-        data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
-        return data.reshape(shape)
-
-    def __len__(self):
-        return len(self.results)
-
-    def __getitem__(self, item):
-        result_encoded = self.results[item]
-        bbox_xywh = self.boxes_xywh[item]
-        return result_encoded, bbox_xywh
-
-
-class DensePoseList(object):
-
-    _TORCH_DEVICE_CPU = torch.device("cpu")
-
-    def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
-        assert len(densepose_datas) == len(
-            boxes_xyxy_abs
-        ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
-            len(densepose_datas), len(boxes_xyxy_abs)
-        )
-        self.densepose_datas = []
-        for densepose_data in densepose_datas:
-            assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
-                "Attempt to initialize DensePoseList with DensePose datas "
-                "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
-            )
-            densepose_data_ondevice = (
-                densepose_data.to(device) if densepose_data is not None else None
-            )
-            self.densepose_datas.append(densepose_data_ondevice)
-        self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
-        self.image_size_hw = image_size_hw
-        self.device = device
-
-    def to(self, device):
-        if self.device == device:
-            return self
-        return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
-
-    def __iter__(self):
-        return iter(self.densepose_datas)
-
-    def __len__(self):
-        return len(self.densepose_datas)
-
-    def __repr__(self):
-        s = self.__class__.__name__ + "("
-        s += "num_instances={}, ".format(len(self.densepose_datas))
-        s += "image_width={}, ".format(self.image_size_hw[1])
-        s += "image_height={})".format(self.image_size_hw[0])
-        return s
-
-    def __getitem__(self, item):
-        if isinstance(item, int):
-            densepose_data_rel = self.densepose_datas[item]
-            return densepose_data_rel
-        elif isinstance(item, slice):
-            densepose_datas_rel = self.densepose_datas[item]
-            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
-            return DensePoseList(
-                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
-            )
-        elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
-            densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
-            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
-            return DensePoseList(
-                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
-            )
-        else:
-            densepose_datas_rel = [self.densepose_datas[i] for i in item]
-            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
-            return DensePoseList(
-                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
-            )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
deleted file mode 100644
index 489e7b0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
+++ /dev/null
@@ -1,1138 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# This is a modified version of cocoeval.py where we also have the densepose evaluation.
-
-__author__ = "tsungyi"
-
-import copy
-import datetime
-import itertools
-import logging
-import numpy as np
-import pickle
-import time
-from collections import defaultdict
-from enum import Enum
-from typing import Any, Dict, Tuple
-import scipy.spatial.distance as ssd
-from fvcore.common.file_io import PathManager
-from pycocotools import mask as maskUtils
-from scipy.io import loadmat
-from scipy.ndimage import zoom as spzoom
-
-from .data.structures import DensePoseDataRelative, DensePoseResult
-
-logger = logging.getLogger(__name__)
-
-
-class DensePoseEvalMode(str, Enum):
-    # use both masks and geodesic distances (GPS * IOU) to compute scores
-    GPSM = "gpsm"
-    # use only geodesic distances (GPS)  to compute scores
-    GPS = "gps"
-    # use only masks (IOU) to compute scores
-    IOU = "iou"
-
-
-class DensePoseDataMode(str, Enum):
-    # use estimated IUV data (default mode)
-    IUV_DT = "iuvdt"
-    # use ground truth IUV data
-    IUV_GT = "iuvgt"
-    # use ground truth labels I and set UV to 0
-    I_GT_UV_0 = "igtuv0"
-    # use ground truth labels I and estimated UV coordinates
-    I_GT_UV_DT = "igtuvdt"
-    # use estimated labels I and set UV to 0
-    I_DT_UV_0 = "idtuv0"
-
-
-class DensePoseCocoEval(object):
-    # Interface for evaluating detection on the Microsoft COCO dataset.
-    #
-    # The usage for CocoEval is as follows:
-    #  cocoGt=..., cocoDt=...       # load dataset and results
-    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
-    #  E.params.recThrs = ...;      # set parameters as desired
-    #  E.evaluate();                # run per image evaluation
-    #  E.accumulate();              # accumulate per image results
-    #  E.summarize();               # display summary metrics of results
-    # For example usage see evalDemo.m and http://mscoco.org/.
-    #
-    # The evaluation parameters are as follows (defaults in brackets):
-    #  imgIds     - [all] N demo ids to use for evaluation
-    #  catIds     - [all] K cat ids to use for evaluation
-    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
-    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
-    #  areaRng    - [...] A=4 object area ranges for evaluation
-    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
-    #  iouType    - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose'
-    #  iouType replaced the now DEPRECATED useSegm parameter.
-    #  useCats    - [1] if true use category labels for evaluation
-    # Note: if useCats=0 category labels are ignored as in proposal scoring.
-    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
-    #
-    # evaluate(): evaluates detections on every image and every category and
-    # concats the results into the "evalImgs" with fields:
-    #  dtIds      - [1xD] id for each of the D detections (dt)
-    #  gtIds      - [1xG] id for each of the G ground truths (gt)
-    #  dtMatches  - [TxD] matching gt id at each IoU or 0
-    #  gtMatches  - [TxG] matching dt id at each IoU or 0
-    #  dtScores   - [1xD] confidence of each dt
-    #  gtIgnore   - [1xG] ignore flag for each gt
-    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
-    #
-    # accumulate(): accumulates the per-image, per-category evaluation
-    # results in "evalImgs" into the dictionary "eval" with fields:
-    #  params     - parameters used for evaluation
-    #  date       - date evaluation was performed
-    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
-    #  precision  - [TxRxKxAxM] precision for every evaluation setting
-    #  recall     - [TxKxAxM] max recall for every evaluation setting
-    # Note: precision and recall==-1 for settings with no gt objects.
-    #
-    # See also coco, mask, pycocoDemo, pycocoEvalDemo
-    #
-    # Microsoft COCO Toolbox.      version 2.0
-    # Data, paper, and tutorials available at:  http://mscoco.org/
-    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
-    # Licensed under the Simplified BSD License [see coco/license.txt]
-    def __init__(
-        self,
-        cocoGt=None,
-        cocoDt=None,
-        iouType: str = "densepose",
-        dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS,
-        dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT,
-    ):
-        """
-        Initialize CocoEval using coco APIs for gt and dt
-        :param cocoGt: coco object with ground truth annotations
-        :param cocoDt: coco object with detection results
-        :return: None
-        """
-        self.cocoGt = cocoGt  # ground truth COCO API
-        self.cocoDt = cocoDt  # detections COCO API
-        self._dpEvalMode = dpEvalMode
-        self._dpDataMode = dpDataMode
-        self.params = {}  # evaluation parameters
-        self.evalImgs = defaultdict(list)  # per-image per-category eval results [KxAxI]
-        self.eval = {}  # accumulated evaluation results
-        self._gts = defaultdict(list)  # gt for evaluation
-        self._dts = defaultdict(list)  # dt for evaluation
-        self.params = Params(iouType=iouType)  # parameters
-        self._paramsEval = {}  # parameters for evaluation
-        self.stats = []  # result summarization
-        self.ious = {}  # ious between all gts and dts
-        if cocoGt is not None:
-            self.params.imgIds = sorted(cocoGt.getImgIds())
-            self.params.catIds = sorted(cocoGt.getCatIds())
-        self.ignoreThrBB = 0.7
-        self.ignoreThrUV = 0.9
-
-    def _loadGEval(self):
-        smpl_subdiv_fpath = PathManager.get_local_path(
-            "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat"
-        )
-        pdist_transform_fpath = PathManager.get_local_path(
-            "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat"
-        )
-        pdist_matrix_fpath = PathManager.get_local_path(
-            "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120
-        )
-        SMPL_subdiv = loadmat(smpl_subdiv_fpath)
-        self.PDIST_transform = loadmat(pdist_transform_fpath)
-        self.PDIST_transform = self.PDIST_transform["index"].squeeze()
-        UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze()
-        ClosestVertInds = np.arange(UV.shape[1]) + 1
-        self.Part_UVs = []
-        self.Part_ClosestVertInds = []
-        for i in np.arange(24):
-            self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)])
-            self.Part_ClosestVertInds.append(
-                ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]
-            )
-
-        with open(pdist_matrix_fpath, "rb") as hFile:
-            arrays = pickle.load(hFile, encoding="latin1")
-        self.Pdist_matrix = arrays["Pdist_matrix"]
-        self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze())
-        # Mean geodesic distances for parts.
-        self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150])
-        # Coarse Part labels.
-        self.CoarseParts = np.array(
-            [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8]
-        )
-
-    def _prepare(self):
-        """
-        Prepare ._gts and ._dts for evaluation based on params
-        :return: None
-        """
-
-        def _toMask(anns, coco):
-            # modify ann['segmentation'] by reference
-            for ann in anns:
-                rle = coco.annToRLE(ann)
-                ann["segmentation"] = rle
-
-        def _getIgnoreRegion(iid, coco):
-            img = coco.imgs[iid]
-
-            if "ignore_regions_x" not in img.keys():
-                return None
-
-            if len(img["ignore_regions_x"]) == 0:
-                return None
-
-            rgns_merged = []
-            for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"]):
-                rgns = [iter(region_x), iter(region_y)]
-                rgns_merged.append([next(it) for it in itertools.cycle(rgns)])
-            rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"])
-            rle = maskUtils.merge(rles)
-            return maskUtils.decode(rle)
-
-        def _checkIgnore(dt, iregion):
-            if iregion is None:
-                return True
-
-            bb = np.array(dt["bbox"]).astype(np.int)
-            x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]
-            x2 = min([x2, iregion.shape[1]])
-            y2 = min([y2, iregion.shape[0]])
-
-            if bb[2] * bb[3] == 0:
-                return False
-
-            crop_iregion = iregion[y1:y2, x1:x2]
-
-            if crop_iregion.sum() == 0:
-                return True
-
-            if "densepose" not in dt.keys():  # filtering boxes
-                return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB
-
-            # filtering UVs
-            ignoremask = np.require(crop_iregion, requirements=["F"])
-            mask = self._extract_mask(dt)
-            uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
-            uvmask_ = maskUtils.encode(uvmask)
-            ignoremask_ = maskUtils.encode(ignoremask)
-            uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
-            return uviou < self.ignoreThrUV
-
-        p = self.params
-
-        if p.useCats:
-            gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
-            dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
-        else:
-            gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
-            dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
-
-        imns = self.cocoGt.loadImgs(p.imgIds)
-        self.size_mapping = {}
-        for im in imns:
-            self.size_mapping[im["id"]] = [im["height"], im["width"]]
-
-        # if iouType == 'uv', add point gt annotations
-        if p.iouType == "densepose":
-            self._loadGEval()
-
-        # convert ground truth to mask if iouType == 'segm'
-        if p.iouType == "segm":
-            _toMask(gts, self.cocoGt)
-            _toMask(dts, self.cocoDt)
-
-        # set ignore flag
-        for gt in gts:
-            gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
-            gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
-            if p.iouType == "keypoints":
-                gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
-            if p.iouType == "densepose":
-                gt["ignore"] = ("dp_x" in gt) == 0
-
-        self._gts = defaultdict(list)  # gt for evaluation
-        self._dts = defaultdict(list)  # dt for evaluation
-        self._igrgns = defaultdict(list)
-
-        for gt in gts:
-            iid = gt["image_id"]
-            if iid not in self._igrgns.keys():
-                self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt)
-            if _checkIgnore(gt, self._igrgns[iid]):
-                self._gts[iid, gt["category_id"]].append(gt)
-        for dt in dts:
-            iid = dt["image_id"]
-            if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]):
-                self._dts[iid, dt["category_id"]].append(dt)
-
-        self.evalImgs = defaultdict(list)  # per-image per-category evaluation results
-        self.eval = {}  # accumulated evaluation results
-
-    def evaluate(self):
-        """
-        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
-        :return: None
-        """
-        tic = time.time()
-        logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType))
-        p = self.params
-        # add backward compatibility if useSegm is specified in params
-        if p.useSegm is not None:
-            p.iouType = "segm" if p.useSegm == 1 else "bbox"
-            logger.info("useSegm (deprecated) is not None. Running DensePose evaluation")
-        p.imgIds = list(np.unique(p.imgIds))
-        if p.useCats:
-            p.catIds = list(np.unique(p.catIds))
-        p.maxDets = sorted(p.maxDets)
-        self.params = p
-
-        self._prepare()
-        # loop through images, area range, max detection number
-        catIds = p.catIds if p.useCats else [-1]
-
-        if p.iouType in ["segm", "bbox"]:
-            computeIoU = self.computeIoU
-        elif p.iouType == "keypoints":
-            computeIoU = self.computeOks
-        elif p.iouType == "densepose":
-            computeIoU = self.computeOgps
-            if self._dpEvalMode == DensePoseEvalMode.GPSM:
-                self.real_ious = {
-                    (imgId, catId): self.computeDPIoU(imgId, catId)
-                    for imgId in p.imgIds
-                    for catId in catIds
-                }
-
-        self.ious = {
-            (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
-        }
-
-        evaluateImg = self.evaluateImg
-        maxDet = p.maxDets[-1]
-        self.evalImgs = [
-            evaluateImg(imgId, catId, areaRng, maxDet)
-            for catId in catIds
-            for areaRng in p.areaRng
-            for imgId in p.imgIds
-        ]
-        self._paramsEval = copy.deepcopy(self.params)
-        toc = time.time()
-        logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic))
-
-    def getDensePoseMask(self, polys):
-        maskGen = np.zeros([256, 256])
-        for i in range(1, 15):
-            if polys[i - 1]:
-                currentMask = maskUtils.decode(polys[i - 1])
-                maskGen[currentMask > 0] = i
-        return maskGen
-
-    def _generate_rlemask_on_image(self, mask, imgId, data):
-        bbox_xywh = np.array(data["bbox"])
-        x, y, w, h = bbox_xywh
-        im_h, im_w = self.size_mapping[imgId]
-        im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
-        if mask is not None:
-            x0 = max(int(x), 0)
-            x1 = min(int(x + w), im_w, int(x) + mask.shape[1])
-            y0 = max(int(y), 0)
-            y1 = min(int(y + h), im_h, int(y) + mask.shape[0])
-            y = int(y)
-            x = int(x)
-            im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x]
-        im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"])
-        rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0]
-        return rle_mask
-
-    def computeDPIoU(self, imgId, catId):
-        p = self.params
-        if p.useCats:
-            gt = self._gts[imgId, catId]
-            dt = self._dts[imgId, catId]
-        else:
-            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
-            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
-        if len(gt) == 0 and len(dt) == 0:
-            return []
-        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
-        dt = [dt[i] for i in inds]
-        if len(dt) > p.maxDets[-1]:
-            dt = dt[0 : p.maxDets[-1]]
-
-        gtmasks = []
-        for g in gt:
-            if DensePoseDataRelative.S_KEY in g:
-                mask = self.getDensePoseMask(g[DensePoseDataRelative.S_KEY])
-                _, _, w, h = g["bbox"]
-                scale_x = float(max(w, 1)) / mask.shape[1]
-                scale_y = float(max(h, 1)) / mask.shape[0]
-                mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False)
-                mask = np.array(mask > 0.5, dtype=np.uint8)
-                rle_mask = self._generate_rlemask_on_image(mask, imgId, g)
-            elif "segmentation" in g:
-                segmentation = g["segmentation"]
-                if isinstance(segmentation, list) and segmentation:
-                    # polygons
-                    im_h, im_w = self.size_mapping[imgId]
-                    rles = maskUtils.frPyObjects(segmentation, im_h, im_w)
-                    rle_mask = maskUtils.merge(rles)
-                elif isinstance(segmentation, dict):
-                    if isinstance(segmentation["counts"], list):
-                        # uncompressed RLE
-                        im_h, im_w = self.size_mapping[imgId]
-                        rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w)
-                    else:
-                        # compressed RLE
-                        rle_mask = segmentation
-                else:
-                    rle_mask = self._generate_rlemask_on_image(None, imgId, g)
-            else:
-                rle_mask = self._generate_rlemask_on_image(None, imgId, g)
-            gtmasks.append(rle_mask)
-
-        dtmasks = []
-        for d in dt:
-            mask = self._extract_mask(d)
-            mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
-            rle_mask = self._generate_rlemask_on_image(mask, imgId, d)
-            dtmasks.append(rle_mask)
-
-        # compute iou between each dt and gt region
-        iscrowd = [int(o["iscrowd"]) for o in gt]
-        iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd)
-        return iousDP
-
-    def computeIoU(self, imgId, catId):
-        p = self.params
-        if p.useCats:
-            gt = self._gts[imgId, catId]
-            dt = self._dts[imgId, catId]
-        else:
-            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
-            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
-        if len(gt) == 0 and len(dt) == 0:
-            return []
-        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
-        dt = [dt[i] for i in inds]
-        if len(dt) > p.maxDets[-1]:
-            dt = dt[0 : p.maxDets[-1]]
-
-        if p.iouType == "segm":
-            g = [g["segmentation"] for g in gt]
-            d = [d["segmentation"] for d in dt]
-        elif p.iouType == "bbox":
-            g = [g["bbox"] for g in gt]
-            d = [d["bbox"] for d in dt]
-        else:
-            raise Exception("unknown iouType for iou computation")
-
-        # compute iou between each dt and gt region
-        iscrowd = [int(o["iscrowd"]) for o in gt]
-        ious = maskUtils.iou(d, g, iscrowd)
-        return ious
-
-    def computeOks(self, imgId, catId):
-        p = self.params
-        # dimension here should be Nxm
-        gts = self._gts[imgId, catId]
-        dts = self._dts[imgId, catId]
-        inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
-        dts = [dts[i] for i in inds]
-        if len(dts) > p.maxDets[-1]:
-            dts = dts[0 : p.maxDets[-1]]
-        # if len(gts) == 0 and len(dts) == 0:
-        if len(gts) == 0 or len(dts) == 0:
-            return []
-        ious = np.zeros((len(dts), len(gts)))
-        sigmas = (
-            np.array(
-                [
-                    0.26,
-                    0.25,
-                    0.25,
-                    0.35,
-                    0.35,
-                    0.79,
-                    0.79,
-                    0.72,
-                    0.72,
-                    0.62,
-                    0.62,
-                    1.07,
-                    1.07,
-                    0.87,
-                    0.87,
-                    0.89,
-                    0.89,
-                ]
-            )
-            / 10.0
-        )
-        vars = (sigmas * 2) ** 2
-        k = len(sigmas)
-        # compute oks between each detection and ground truth object
-        for j, gt in enumerate(gts):
-            # create bounds for ignore regions(double the gt bbox)
-            g = np.array(gt["keypoints"])
-            xg = g[0::3]
-            yg = g[1::3]
-            vg = g[2::3]
-            k1 = np.count_nonzero(vg > 0)
-            bb = gt["bbox"]
-            x0 = bb[0] - bb[2]
-            x1 = bb[0] + bb[2] * 2
-            y0 = bb[1] - bb[3]
-            y1 = bb[1] + bb[3] * 2
-            for i, dt in enumerate(dts):
-                d = np.array(dt["keypoints"])
-                xd = d[0::3]
-                yd = d[1::3]
-                if k1 > 0:
-                    # measure the per-keypoint distance if keypoints visible
-                    dx = xd - xg
-                    dy = yd - yg
-                else:
-                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
-                    z = np.zeros(k)
-                    dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
-                    dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
-                e = (dx ** 2 + dy ** 2) / vars / (gt["area"] + np.spacing(1)) / 2
-                if k1 > 0:
-                    e = e[vg > 0]
-                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
-        return ious
-
-    def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray:
-        (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt["densepose"]
-        densepose_data = DensePoseResult.decode_png_data(densepose_shape, densepose_data_encoded)
-        return densepose_data[0]
-
-    def _extract_iuv(
-        self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any]
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-        """
-        Extract arrays of I, U and V values at given points as numpy arrays
-        given the data mode stored in self._dpDataMode
-        """
-        if self._dpDataMode == DensePoseDataMode.IUV_DT:
-            # estimated labels and UV (default)
-            ipoints = densepose_data[0, py, px]
-            upoints = densepose_data[1, py, px] / 255.0  # convert from uint8 by /255.
-            vpoints = densepose_data[2, py, px] / 255.0
-        elif self._dpDataMode == DensePoseDataMode.IUV_GT:
-            # ground truth
-            ipoints = np.array(gt["dp_I"])
-            upoints = np.array(gt["dp_U"])
-            vpoints = np.array(gt["dp_V"])
-        elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0:
-            # ground truth labels, UV = 0
-            ipoints = np.array(gt["dp_I"])
-            upoints = upoints * 0.0
-            vpoints = vpoints * 0.0
-        elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT:
-            # ground truth labels, estimated UV
-            ipoints = np.array(gt["dp_I"])
-            upoints = densepose_data[1, py, px] / 255.0  # convert from uint8 by /255.
-            vpoints = densepose_data[2, py, px] / 255.0
-        elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0:
-            # estimated labels, UV = 0
-            ipoints = densepose_data[0, py, px]
-            upoints = upoints * 0.0
-            vpoints = vpoints * 0.0
-        else:
-            raise ValueError(f"Unknown data mode: {self._dpDataMode}")
-        return ipoints, upoints, vpoints
-
-    def computeOgps(self, imgId, catId):
-        p = self.params
-        # dimension here should be Nxm
-        g = self._gts[imgId, catId]
-        d = self._dts[imgId, catId]
-        inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort")
-        d = [d[i] for i in inds]
-        if len(d) > p.maxDets[-1]:
-            d = d[0 : p.maxDets[-1]]
-        # if len(gts) == 0 and len(dts) == 0:
-        if len(g) == 0 or len(d) == 0:
-            return []
-        ious = np.zeros((len(d), len(g)))
-        # compute opgs between each detection and ground truth object
-        # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5
-        # 1 # dist = 0.3m corresponds to ogps = 0.96
-        # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5)
-        for j, gt in enumerate(g):
-            if not gt["ignore"]:
-                g_ = gt["bbox"]
-                for i, dt in enumerate(d):
-                    #
-                    dy = int(dt["bbox"][3])
-                    dx = int(dt["bbox"][2])
-                    dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0
-                    dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0
-                    py = (dp_y + g_[1] - dt["bbox"][1]).astype(np.int)
-                    px = (dp_x + g_[0] - dt["bbox"][0]).astype(np.int)
-                    #
-                    pts = np.zeros(len(px))
-                    pts[px >= dx] = -1
-                    pts[py >= dy] = -1
-                    pts[px < 0] = -1
-                    pts[py < 0] = -1
-                    if len(pts) < 1:
-                        ogps = 0.0
-                    elif np.max(pts) == -1:
-                        ogps = 0.0
-                    else:
-                        px[pts == -1] = 0
-                        py[pts == -1] = 0
-                        (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt[
-                            "densepose"
-                        ]
-                        densepose_data = DensePoseResult.decode_png_data(
-                            densepose_shape, densepose_data_encoded
-                        )
-                        assert densepose_data.shape[2] == dx, (
-                            "DensePoseData width {} should be equal to "
-                            "detection bounding box width {}".format(densepose_data.shape[2], dx)
-                        )
-                        assert densepose_data.shape[1] == dy, (
-                            "DensePoseData height {} should be equal to "
-                            "detection bounding box height {}".format(densepose_data.shape[1], dy)
-                        )
-                        ipoints, upoints, vpoints = self._extract_iuv(densepose_data, py, px, gt)
-                        ipoints[pts == -1] = 0
-                        # Find closest vertices in subsampled mesh.
-                        cVerts, cVertsGT = self.findAllClosestVerts(gt, upoints, vpoints, ipoints)
-                        # Get pairwise geodesic distances between gt and estimated mesh points.
-                        dist = self.getDistances(cVertsGT, cVerts)
-                        # Compute the Ogps measure.
-                        # Find the mean geodesic normalization distance for
-                        # each GT point, based on which part it is on.
-                        Current_Mean_Distances = self.Mean_Distances[
-                            self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]]
-                        ]
-                        # Compute gps
-                        ogps_values = np.exp(-(dist ** 2) / (2 * (Current_Mean_Distances ** 2)))
-                        #
-                        if len(dist) > 0:
-                            ogps = np.sum(ogps_values) / len(dist)
-                    ious[i, j] = ogps
-
-        gbb = [gt["bbox"] for gt in g]
-        dbb = [dt["bbox"] for dt in d]
-
-        # compute iou between each dt and gt region
-        iscrowd = [int(o["iscrowd"]) for o in g]
-        ious_bb = maskUtils.iou(dbb, gbb, iscrowd)
-        return ious, ious_bb
-
-    def evaluateImg(self, imgId, catId, aRng, maxDet):
-        """
-        perform evaluation for single category and image
-        :return: dict (single image results)
-        """
-
-        p = self.params
-        if p.useCats:
-            gt = self._gts[imgId, catId]
-            dt = self._dts[imgId, catId]
-        else:
-            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
-            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
-        if len(gt) == 0 and len(dt) == 0:
-            return None
-
-        for g in gt:
-            # g['_ignore'] = g['ignore']
-            if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
-                g["_ignore"] = True
-            else:
-                g["_ignore"] = False
-
-        # sort dt highest score first, sort gt ignore last
-        gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
-        gt = [gt[i] for i in gtind]
-        dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
-        dt = [dt[i] for i in dtind[0:maxDet]]
-        iscrowd = [int(o["iscrowd"]) for o in gt]
-        # load computed ious
-        if p.iouType == "densepose":
-            # print('Checking the length', len(self.ious[imgId, catId]))
-            # if len(self.ious[imgId, catId]) == 0:
-            #    print(self.ious[imgId, catId])
-            ious = (
-                self.ious[imgId, catId][0][:, gtind]
-                if len(self.ious[imgId, catId]) > 0
-                else self.ious[imgId, catId]
-            )
-            ioubs = (
-                self.ious[imgId, catId][1][:, gtind]
-                if len(self.ious[imgId, catId]) > 0
-                else self.ious[imgId, catId]
-            )
-            if self._dpEvalMode == DensePoseEvalMode.GPSM:
-                iousM = (
-                    self.real_ious[imgId, catId][:, gtind]
-                    if len(self.real_ious[imgId, catId]) > 0
-                    else self.real_ious[imgId, catId]
-                )
-        else:
-            ious = (
-                self.ious[imgId, catId][:, gtind]
-                if len(self.ious[imgId, catId]) > 0
-                else self.ious[imgId, catId]
-            )
-
-        T = len(p.iouThrs)
-        G = len(gt)
-        D = len(dt)
-        gtm = np.zeros((T, G))
-        dtm = np.zeros((T, D))
-        gtIg = np.array([g["_ignore"] for g in gt])
-        dtIg = np.zeros((T, D))
-        if np.all(gtIg) and p.iouType == "densepose":
-            dtIg = np.logical_or(dtIg, True)
-
-        if len(ious) > 0:  # and not p.iouType == 'densepose':
-            for tind, t in enumerate(p.iouThrs):
-                for dind, d in enumerate(dt):
-                    # information about best match so far (m=-1 -> unmatched)
-                    iou = min([t, 1 - 1e-10])
-                    m = -1
-                    for gind, _g in enumerate(gt):
-                        # if this gt already matched, and not a crowd, continue
-                        if gtm[tind, gind] > 0 and not iscrowd[gind]:
-                            continue
-                        # if dt matched to reg gt, and on ignore gt, stop
-                        if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
-                            break
-                        if p.iouType == "densepose":
-                            if self._dpEvalMode == DensePoseEvalMode.GPSM:
-                                new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind])
-                            elif self._dpEvalMode == DensePoseEvalMode.IOU:
-                                new_iou = iousM[dind, gind]
-                            elif self._dpEvalMode == DensePoseEvalMode.GPS:
-                                new_iou = ious[dind, gind]
-                        else:
-                            new_iou = ious[dind, gind]
-                        if new_iou < iou:
-                            continue
-                        if new_iou == 0.0:
-                            continue
-                        # if match successful and best so far, store appropriately
-                        iou = new_iou
-                        m = gind
-                    # if match made store id of match for both dt and gt
-                    if m == -1:
-                        continue
-                    dtIg[tind, dind] = gtIg[m]
-                    dtm[tind, dind] = gt[m]["id"]
-                    gtm[tind, m] = d["id"]
-
-        if p.iouType == "densepose":
-            if not len(ioubs) == 0:
-                for dind, d in enumerate(dt):
-                    # information about best match so far (m=-1 -> unmatched)
-                    if dtm[tind, dind] == 0:
-                        ioub = 0.8
-                        m = -1
-                        for gind, _g in enumerate(gt):
-                            # if this gt already matched, and not a crowd, continue
-                            if gtm[tind, gind] > 0 and not iscrowd[gind]:
-                                continue
-                            # continue to next gt unless better match made
-                            if ioubs[dind, gind] < ioub:
-                                continue
-                            # if match successful and best so far, store appropriately
-                            ioub = ioubs[dind, gind]
-                            m = gind
-                            # if match made store id of match for both dt and gt
-                        if m > -1:
-                            dtIg[:, dind] = gtIg[m]
-                            if gtIg[m]:
-                                dtm[tind, dind] = gt[m]["id"]
-                                gtm[tind, m] = d["id"]
-        # set unmatched detections outside of area range to ignore
-        a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt)))
-        dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
-        # store results for given image and category
-        # print('Done with the function', len(self.ious[imgId, catId]))
-        return {
-            "image_id": imgId,
-            "category_id": catId,
-            "aRng": aRng,
-            "maxDet": maxDet,
-            "dtIds": [d["id"] for d in dt],
-            "gtIds": [g["id"] for g in gt],
-            "dtMatches": dtm,
-            "gtMatches": gtm,
-            "dtScores": [d["score"] for d in dt],
-            "gtIgnore": gtIg,
-            "dtIgnore": dtIg,
-        }
-
-    def accumulate(self, p=None):
-        """
-        Accumulate per image evaluation results and store the result in self.eval
-        :param p: input params for evaluation
-        :return: None
-        """
-        logger.info("Accumulating evaluation results...")
-        tic = time.time()
-        if not self.evalImgs:
-            logger.info("Please run evaluate() first")
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T = len(p.iouThrs)
-        R = len(p.recThrs)
-        K = len(p.catIds) if p.useCats else 1
-        A = len(p.areaRng)
-        M = len(p.maxDets)
-        precision = -(np.ones((T, R, K, A, M)))  # -1 for the precision of absent categories
-        recall = -(np.ones((T, K, A, M)))
-
-        # create dictionary for future indexing
-        logger.info("Categories: {}".format(p.catIds))
-        _pe = self._paramsEval
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds) if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0 * A0 * I0
-            for a, a0 in enumerate(a_list):
-                Na = a0 * I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if e is not None]
-                    if len(E) == 0:
-                        continue
-                    dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind="mergesort")
-
-                    dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
-                    dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
-                    gtIg = np.concatenate([e["gtIgnore"] for e in E])
-                    npig = np.count_nonzero(gtIg == 0)
-                    if npig == 0:
-                        continue
-                    tps = np.logical_and(dtm, np.logical_not(dtIg))
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp + tp + np.spacing(1))
-                        q = np.zeros((R,))
-
-                        if nd:
-                            recall[t, k, a, m] = rc[-1]
-                        else:
-                            recall[t, k, a, m] = 0
-
-                        # numpy is slow without cython optimization for accessing elements
-                        # use python array gets significant speed improvement
-                        pr = pr.tolist()
-                        q = q.tolist()
-
-                        for i in range(nd - 1, 0, -1):
-                            if pr[i] > pr[i - 1]:
-                                pr[i - 1] = pr[i]
-
-                        inds = np.searchsorted(rc, p.recThrs, side="left")
-                        try:
-                            for ri, pi in enumerate(inds):
-                                q[ri] = pr[pi]
-                        except Exception:
-                            pass
-                        precision[t, :, k, a, m] = np.array(q)
-        logger.info(
-            "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision))
-        )
-        self.eval = {
-            "params": p,
-            "counts": [T, R, K, A, M],
-            "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-            "precision": precision,
-            "recall": recall,
-        }
-        toc = time.time()
-        logger.info("DONE (t={:0.2f}s).".format(toc - tic))
-
-    def summarize(self):
-        """
-        Compute and display summary metrics for evaluation results.
-        Note this function can *only* be applied on the default parameter setting
-        """
-
-        def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
-            p = self.params
-            iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
-            titleStr = "Average Precision" if ap == 1 else "Average Recall"
-            typeStr = "(AP)" if ap == 1 else "(AR)"
-            measure = "IoU"
-            if self.params.iouType == "keypoints":
-                measure = "OKS"
-            elif self.params.iouType == "densepose":
-                measure = "OGPS"
-            iouStr = (
-                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
-                if iouThr is None
-                else "{:0.2f}".format(iouThr)
-            )
-
-            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
-            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
-            if ap == 1:
-                # dimension of precision: [TxRxKxAxM]
-                s = self.eval["precision"]
-                # IoU
-                if iouThr is not None:
-                    t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
-                    s = s[t]
-                s = s[:, :, :, aind, mind]
-            else:
-                # dimension of recall: [TxKxAxM]
-                s = self.eval["recall"]
-                if iouThr is not None:
-                    t = np.where(iouThr == p.iouThrs)[0]
-                    s = s[t]
-                s = s[:, :, aind, mind]
-            if len(s[s > -1]) == 0:
-                mean_s = -1
-            else:
-                mean_s = np.mean(s[s > -1])
-            logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s))
-            return mean_s
-
-        def _summarizeDets():
-            stats = np.zeros((12,))
-            stats[0] = _summarize(1)
-            stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
-            stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
-            stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
-            stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
-            stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
-            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
-            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
-            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
-            stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
-            stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
-            stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
-            return stats
-
-        def _summarizeKps():
-            stats = np.zeros((10,))
-            stats[0] = _summarize(1, maxDets=20)
-            stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
-            stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
-            stats[3] = _summarize(1, maxDets=20, areaRng="medium")
-            stats[4] = _summarize(1, maxDets=20, areaRng="large")
-            stats[5] = _summarize(0, maxDets=20)
-            stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
-            stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
-            stats[8] = _summarize(0, maxDets=20, areaRng="medium")
-            stats[9] = _summarize(0, maxDets=20, areaRng="large")
-            return stats
-
-        def _summarizeUvs():
-            stats = np.zeros((10,))
-            stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
-            stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
-            stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
-            stats[3] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
-            stats[4] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
-            stats[5] = _summarize(0, maxDets=self.params.maxDets[0])
-            stats[6] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
-            stats[7] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
-            stats[8] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
-            stats[9] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
-            return stats
-
-        def _summarizeUvsOld():
-            stats = np.zeros((18,))
-            stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
-            stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
-            stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55)
-            stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60)
-            stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65)
-            stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70)
-            stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
-            stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80)
-            stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85)
-            stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90)
-            stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95)
-            stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
-            stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
-            stats[13] = _summarize(0, maxDets=self.params.maxDets[0])
-            stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
-            stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
-            stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
-            stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
-            return stats
-
-        if not self.eval:
-            raise Exception("Please run accumulate() first")
-        iouType = self.params.iouType
-        if iouType in ["segm", "bbox"]:
-            summarize = _summarizeDets
-        elif iouType in ["keypoints"]:
-            summarize = _summarizeKps
-        elif iouType in ["densepose"]:
-            summarize = _summarizeUvs
-        self.stats = summarize()
-
-    def __str__(self):
-        self.summarize()
-
-    # ================ functions for dense pose ==============================
-    def findAllClosestVerts(self, gt, U_points, V_points, Index_points):
-        #
-        I_gt = np.array(gt["dp_I"])
-        U_gt = np.array(gt["dp_U"])
-        V_gt = np.array(gt["dp_V"])
-        #
-        # print(I_gt)
-        #
-        ClosestVerts = np.ones(Index_points.shape) * -1
-        for i in np.arange(24):
-            #
-            if sum(Index_points == (i + 1)) > 0:
-                UVs = np.array(
-                    [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]]
-                )
-                Current_Part_UVs = self.Part_UVs[i]
-                Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
-                D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
-                ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[
-                    np.argmin(D, axis=0)
-                ]
-        #
-        ClosestVertsGT = np.ones(Index_points.shape) * -1
-        for i in np.arange(24):
-            if sum(I_gt == (i + 1)) > 0:
-                UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]])
-                Current_Part_UVs = self.Part_UVs[i]
-                Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
-                D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
-                ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)]
-        #
-        return ClosestVerts, ClosestVertsGT
-
-    def getDistances(self, cVertsGT, cVerts):
-
-        ClosestVertsTransformed = self.PDIST_transform[cVerts.astype(int) - 1]
-        ClosestVertsGTTransformed = self.PDIST_transform[cVertsGT.astype(int) - 1]
-        #
-        ClosestVertsTransformed[cVerts < 0] = 0
-        ClosestVertsGTTransformed[cVertsGT < 0] = 0
-        #
-        cVertsGT = ClosestVertsGTTransformed
-        cVerts = ClosestVertsTransformed
-        #
-        n = 27554
-        dists = []
-        for d in range(len(cVertsGT)):
-            if cVertsGT[d] > 0:
-                if cVerts[d] > 0:
-                    i = cVertsGT[d] - 1
-                    j = cVerts[d] - 1
-                    if j == i:
-                        dists.append(0)
-                    elif j > i:
-                        ccc = i
-                        i = j
-                        j = ccc
-                        i = n - i - 1
-                        j = n - j - 1
-                        k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
-                        k = (n * n - n) / 2 - k - 1
-                        dists.append(self.Pdist_matrix[int(k)][0])
-                    else:
-                        i = n - i - 1
-                        j = n - j - 1
-                        k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
-                        k = (n * n - n) / 2 - k - 1
-                        dists.append(self.Pdist_matrix[int(k)][0])
-                else:
-                    dists.append(np.inf)
-        return np.atleast_1d(np.array(dists).squeeze())
-
-
-class Params:
-    """
-    Params for coco evaluation api
-    """
-
-    def setDetParams(self):
-        self.imgIds = []
-        self.catIds = []
-        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
-        self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
-        self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
-        self.maxDets = [1, 10, 100]
-        self.areaRng = [
-            [0 ** 2, 1e5 ** 2],
-            [0 ** 2, 32 ** 2],
-            [32 ** 2, 96 ** 2],
-            [96 ** 2, 1e5 ** 2],
-        ]
-        self.areaRngLbl = ["all", "small", "medium", "large"]
-        self.useCats = 1
-
-    def setKpParams(self):
-        self.imgIds = []
-        self.catIds = []
-        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
-        self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
-        self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
-        self.maxDets = [20]
-        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
-        self.areaRngLbl = ["all", "medium", "large"]
-        self.useCats = 1
-
-    def setUvParams(self):
-        self.imgIds = []
-        self.catIds = []
-        self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
-        self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
-        self.maxDets = [20]
-        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
-        self.areaRngLbl = ["all", "medium", "large"]
-        self.useCats = 1
-
-    def __init__(self, iouType="segm"):
-        if iouType == "segm" or iouType == "bbox":
-            self.setDetParams()
-        elif iouType == "keypoints":
-            self.setKpParams()
-        elif iouType == "densepose":
-            self.setUvParams()
-        else:
-            raise Exception("iouType not supported")
-        self.iouType = iouType
-        # useSegm is deprecated
-        self.useSegm = None
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
deleted file mode 100644
index 3639706..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
+++ /dev/null
@@ -1,1216 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import math
-from dataclasses import dataclass
-from enum import Enum
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.config import CfgNode
-from detectron2.layers import Conv2d, ConvTranspose2d, interpolate
-from detectron2.structures.boxes import matched_boxlist_iou
-from detectron2.utils.registry import Registry
-
-from .data.structures import DensePoseOutput
-
-ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
-
-
-class DensePoseUVConfidenceType(Enum):
-    """
-    Statistical model type for confidence learning, possible values:
-     - "iid_iso": statistically independent identically distributed residuals
-         with anisotropic covariance
-     - "indep_aniso": statistically independent residuals with anisotropic
-         covariances
-    For details, see:
-    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
-    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
-    """
-
-    # fmt: off
-    IID_ISO     = "iid_iso"
-    INDEP_ANISO = "indep_aniso"
-    # fmt: on
-
-
-@dataclass
-class DensePoseUVConfidenceConfig:
-    """
-    Configuration options for confidence on UV data
-    """
-
-    enabled: bool = False
-    # lower bound on UV confidences
-    epsilon: float = 0.01
-    type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
-
-
-@dataclass
-class DensePoseConfidenceModelConfig:
-    """
-    Configuration options for confidence models
-    """
-
-    # confidence for U and V values
-    uv_confidence: DensePoseUVConfidenceConfig
-
-    @staticmethod
-    def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
-        return DensePoseConfidenceModelConfig(
-            uv_confidence=DensePoseUVConfidenceConfig(
-                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
-                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
-                type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
-            )
-        )
-
-
-def initialize_module_params(module):
-    for name, param in module.named_parameters():
-        if "bias" in name:
-            nn.init.constant_(param, 0)
-        elif "weight" in name:
-            nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
-
-
-@ROI_DENSEPOSE_HEAD_REGISTRY.register()
-class DensePoseDeepLabHead(nn.Module):
-    def __init__(self, cfg, input_channels):
-        super(DensePoseDeepLabHead, self).__init__()
-        # fmt: off
-        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
-        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
-        norm                 = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
-        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
-        self.use_nonlocal    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
-        # fmt: on
-        pad_size = kernel_size // 2
-        n_channels = input_channels
-
-        self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels)  # 6, 12, 56
-        self.add_module("ASPP", self.ASPP)
-
-        if self.use_nonlocal:
-            self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
-            self.add_module("NLBlock", self.NLBlock)
-        # weight_init.c2_msra_fill(self.ASPP)
-
-        for i in range(self.n_stacked_convs):
-            norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
-            layer = Conv2d(
-                n_channels,
-                hidden_dim,
-                kernel_size,
-                stride=1,
-                padding=pad_size,
-                bias=not norm,
-                norm=norm_module,
-            )
-            weight_init.c2_msra_fill(layer)
-            n_channels = hidden_dim
-            layer_name = self._get_layer_name(i)
-            self.add_module(layer_name, layer)
-        self.n_out_channels = hidden_dim
-        # initialize_module_params(self)
-
-    def forward(self, features):
-        x0 = features
-        x = self.ASPP(x0)
-        if self.use_nonlocal:
-            x = self.NLBlock(x)
-        output = x
-        for i in range(self.n_stacked_convs):
-            layer_name = self._get_layer_name(i)
-            x = getattr(self, layer_name)(x)
-            x = F.relu(x)
-            output = x
-        return output
-
-    def _get_layer_name(self, i):
-        layer_name = "body_conv_fcn{}".format(i + 1)
-        return layer_name
-
-
-# Copied from
-# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
-# See https://arxiv.org/pdf/1706.05587.pdf for details
-class ASPPConv(nn.Sequential):
-    def __init__(self, in_channels, out_channels, dilation):
-        modules = [
-            nn.Conv2d(
-                in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
-            ),
-            nn.GroupNorm(32, out_channels),
-            nn.ReLU(),
-        ]
-        super(ASPPConv, self).__init__(*modules)
-
-
-class ASPPPooling(nn.Sequential):
-    def __init__(self, in_channels, out_channels):
-        super(ASPPPooling, self).__init__(
-            nn.AdaptiveAvgPool2d(1),
-            nn.Conv2d(in_channels, out_channels, 1, bias=False),
-            nn.GroupNorm(32, out_channels),
-            nn.ReLU(),
-        )
-
-    def forward(self, x):
-        size = x.shape[-2:]
-        x = super(ASPPPooling, self).forward(x)
-        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
-
-
-class ASPP(nn.Module):
-    def __init__(self, in_channels, atrous_rates, out_channels):
-        super(ASPP, self).__init__()
-        modules = []
-        modules.append(
-            nn.Sequential(
-                nn.Conv2d(in_channels, out_channels, 1, bias=False),
-                nn.GroupNorm(32, out_channels),
-                nn.ReLU(),
-            )
-        )
-
-        rate1, rate2, rate3 = tuple(atrous_rates)
-        modules.append(ASPPConv(in_channels, out_channels, rate1))
-        modules.append(ASPPConv(in_channels, out_channels, rate2))
-        modules.append(ASPPConv(in_channels, out_channels, rate3))
-        modules.append(ASPPPooling(in_channels, out_channels))
-
-        self.convs = nn.ModuleList(modules)
-
-        self.project = nn.Sequential(
-            nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
-            # nn.BatchNorm2d(out_channels),
-            nn.ReLU()
-            # nn.Dropout(0.5)
-        )
-
-    def forward(self, x):
-        res = []
-        for conv in self.convs:
-            res.append(conv(x))
-        res = torch.cat(res, dim=1)
-        return self.project(res)
-
-
-# copied from
-# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
-# See https://arxiv.org/abs/1711.07971 for details
-class _NonLocalBlockND(nn.Module):
-    def __init__(
-        self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
-    ):
-        super(_NonLocalBlockND, self).__init__()
-
-        assert dimension in [1, 2, 3]
-
-        self.dimension = dimension
-        self.sub_sample = sub_sample
-
-        self.in_channels = in_channels
-        self.inter_channels = inter_channels
-
-        if self.inter_channels is None:
-            self.inter_channels = in_channels // 2
-            if self.inter_channels == 0:
-                self.inter_channels = 1
-
-        if dimension == 3:
-            conv_nd = nn.Conv3d
-            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
-            bn = nn.GroupNorm  # (32, hidden_dim) #nn.BatchNorm3d
-        elif dimension == 2:
-            conv_nd = nn.Conv2d
-            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
-            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm2d
-        else:
-            conv_nd = nn.Conv1d
-            max_pool_layer = nn.MaxPool1d(kernel_size=2)
-            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm1d
-
-        self.g = conv_nd(
-            in_channels=self.in_channels,
-            out_channels=self.inter_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-
-        if bn_layer:
-            self.W = nn.Sequential(
-                conv_nd(
-                    in_channels=self.inter_channels,
-                    out_channels=self.in_channels,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                ),
-                bn(32, self.in_channels),
-            )
-            nn.init.constant_(self.W[1].weight, 0)
-            nn.init.constant_(self.W[1].bias, 0)
-        else:
-            self.W = conv_nd(
-                in_channels=self.inter_channels,
-                out_channels=self.in_channels,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-            )
-            nn.init.constant_(self.W.weight, 0)
-            nn.init.constant_(self.W.bias, 0)
-
-        self.theta = conv_nd(
-            in_channels=self.in_channels,
-            out_channels=self.inter_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-        self.phi = conv_nd(
-            in_channels=self.in_channels,
-            out_channels=self.inter_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-
-        if sub_sample:
-            self.g = nn.Sequential(self.g, max_pool_layer)
-            self.phi = nn.Sequential(self.phi, max_pool_layer)
-
-    def forward(self, x):
-        """
-        :param x: (b, c, t, h, w)
-        :return:
-        """
-
-        batch_size = x.size(0)
-
-        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
-        g_x = g_x.permute(0, 2, 1)
-
-        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
-        theta_x = theta_x.permute(0, 2, 1)
-        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
-        f = torch.matmul(theta_x, phi_x)
-        f_div_C = F.softmax(f, dim=-1)
-
-        y = torch.matmul(f_div_C, g_x)
-        y = y.permute(0, 2, 1).contiguous()
-        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
-        W_y = self.W(y)
-        z = W_y + x
-
-        return z
-
-
-class NONLocalBlock2D(_NonLocalBlockND):
-    def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
-        super(NONLocalBlock2D, self).__init__(
-            in_channels,
-            inter_channels=inter_channels,
-            dimension=2,
-            sub_sample=sub_sample,
-            bn_layer=bn_layer,
-        )
-
-
-@ROI_DENSEPOSE_HEAD_REGISTRY.register()
-class DensePoseV1ConvXHead(nn.Module):
-    def __init__(self, cfg, input_channels):
-        super(DensePoseV1ConvXHead, self).__init__()
-        # fmt: off
-        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
-        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
-        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
-        # fmt: on
-        pad_size = kernel_size // 2
-        n_channels = input_channels
-        for i in range(self.n_stacked_convs):
-            layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
-            layer_name = self._get_layer_name(i)
-            self.add_module(layer_name, layer)
-            n_channels = hidden_dim
-        self.n_out_channels = n_channels
-        initialize_module_params(self)
-
-    def forward(self, features):
-        x = features
-        output = x
-        for i in range(self.n_stacked_convs):
-            layer_name = self._get_layer_name(i)
-            x = getattr(self, layer_name)(x)
-            x = F.relu(x)
-            output = x
-        return output
-
-    def _get_layer_name(self, i):
-        layer_name = "body_conv_fcn{}".format(i + 1)
-        return layer_name
-
-
-class DensePosePredictor(nn.Module):
-    def __init__(self, cfg, input_channels):
-
-        super(DensePosePredictor, self).__init__()
-        dim_in = input_channels
-        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
-        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
-        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
-        self.ann_index_lowres = ConvTranspose2d(
-            dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-        )
-        self.index_uv_lowres = ConvTranspose2d(
-            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-        )
-        self.u_lowres = ConvTranspose2d(
-            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-        )
-        self.v_lowres = ConvTranspose2d(
-            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-        )
-        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
-        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
-        self._initialize_confidence_estimation_layers(cfg, self.confidence_model_cfg, dim_in)
-        initialize_module_params(self)
-
-    def forward(self, head_outputs):
-        ann_index_lowres = self.ann_index_lowres(head_outputs)
-        index_uv_lowres = self.index_uv_lowres(head_outputs)
-        u_lowres = self.u_lowres(head_outputs)
-        v_lowres = self.v_lowres(head_outputs)
-
-        def interp2d(input):
-            return interpolate(
-                input, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
-            )
-
-        ann_index = interp2d(ann_index_lowres)
-        index_uv = interp2d(index_uv_lowres)
-        u = interp2d(u_lowres)
-        v = interp2d(v_lowres)
-        (
-            (sigma_1, sigma_2, kappa_u, kappa_v),
-            (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
-            (ann_index, index_uv),
-        ) = self._forward_confidence_estimation_layers(
-            self.confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv
-        )
-        return (
-            (ann_index, index_uv, u, v),
-            (ann_index_lowres, index_uv_lowres, u_lowres, v_lowres),
-            (sigma_1, sigma_2, kappa_u, kappa_v),
-            (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
-        )
-
-    def _initialize_confidence_estimation_layers(
-        self, cfg: CfgNode, confidence_model_cfg: DensePoseConfidenceModelConfig, dim_in: int
-    ):
-        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
-        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
-        if confidence_model_cfg.uv_confidence.enabled:
-            if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
-                self.sigma_2_lowres = ConvTranspose2d(
-                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-                )
-            elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
-                self.sigma_2_lowres = ConvTranspose2d(
-                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-                )
-                self.kappa_u_lowres = ConvTranspose2d(
-                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-                )
-                self.kappa_v_lowres = ConvTranspose2d(
-                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
-                )
-            else:
-                raise ValueError(
-                    f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}"
-                )
-
-    def _forward_confidence_estimation_layers(
-        self, confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv
-    ):
-        sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None
-        sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None
-        if confidence_model_cfg.uv_confidence.enabled:
-            if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
-                sigma_2_lowres = self.sigma_2_lowres(head_outputs)
-                sigma_2 = interp2d(sigma_2_lowres)
-            elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
-                sigma_2_lowres = self.sigma_2_lowres(head_outputs)
-                kappa_u_lowres = self.kappa_u_lowres(head_outputs)
-                kappa_v_lowres = self.kappa_v_lowres(head_outputs)
-                sigma_2 = interp2d(sigma_2_lowres)
-                kappa_u = interp2d(kappa_u_lowres)
-                kappa_v = interp2d(kappa_v_lowres)
-            else:
-                raise ValueError(
-                    f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}"
-                )
-        return (
-            (sigma_1, sigma_2, kappa_u, kappa_v),
-            (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
-            (ann_index, index_uv),
-        )
-
-
-class DensePoseDataFilter(object):
-    def __init__(self, cfg):
-        self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
-
-    @torch.no_grad()
-    def __call__(self, proposals_with_targets):
-        """
-        Filters proposals with targets to keep only the ones relevant for
-        DensePose training
-        proposals: list(Instances), each element of the list corresponds to
-            various instances (proposals, GT for boxes and densepose) for one
-            image
-        """
-        proposals_filtered = []
-        for proposals_per_image in proposals_with_targets:
-            if not hasattr(proposals_per_image, "gt_densepose"):
-                continue
-            assert hasattr(proposals_per_image, "gt_boxes")
-            assert hasattr(proposals_per_image, "proposal_boxes")
-            gt_boxes = proposals_per_image.gt_boxes
-            est_boxes = proposals_per_image.proposal_boxes
-            # apply match threshold for densepose head
-            iou = matched_boxlist_iou(gt_boxes, est_boxes)
-            iou_select = iou > self.iou_threshold
-            proposals_per_image = proposals_per_image[iou_select]
-            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
-            # filter out any target without densepose annotation
-            gt_densepose = proposals_per_image.gt_densepose
-            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
-            selected_indices = [
-                i for i, dp_target in enumerate(gt_densepose) if dp_target is not None
-            ]
-            if len(selected_indices) != len(gt_densepose):
-                proposals_per_image = proposals_per_image[selected_indices]
-            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
-            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
-            proposals_filtered.append(proposals_per_image)
-        return proposals_filtered
-
-
-def build_densepose_head(cfg, input_channels):
-    head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
-    return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
-
-
-def build_densepose_predictor(cfg, input_channels):
-    predictor = DensePosePredictor(cfg, input_channels)
-    return predictor
-
-
-def build_densepose_data_filter(cfg):
-    dp_filter = DensePoseDataFilter(cfg)
-    return dp_filter
-
-
-def densepose_inference(densepose_outputs, densepose_confidences, detections):
-    """
-    Infer dense pose estimate based on outputs from the DensePose head
-    and detections. The estimate for each detection instance is stored in its
-    "pred_densepose" attribute.
-
-    Args:
-        densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements:
-            - s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W),
-            - i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W),
-            - u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W),
-            - v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W),
-            where N is the total number of detections in a batch,
-                  A is the number of coarse segmentations labels
-                      (e.g. 15 for coarse body parts + background),
-                  C is the number of fine segmentation labels
-                      (e.g. 25 for fine body parts + background),
-                  W is the resolution along the X axis
-                  H is the resolution along the Y axis
-        densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements:
-            - sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates
-                of size (N, C, H, W)
-            - sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates
-                of size (N, C, H, W)
-            - kappa_u (:obj: `torch.Tensor`): first component of confidence direction
-                vector of size (N, C, H, W)
-            - kappa_v (:obj: `torch.Tensor`): second component of confidence direction
-                vector of size (N, C, H, W)
-        detections (list[Instances]): A list of N Instances, where N is the number of images
-            in the batch. Instances are modified by this method: "pred_densepose" attribute
-            is added to each instance, the attribute contains the corresponding
-            DensePoseOutput object.
-    """
-    # DensePose outputs: segmentation, body part indices, U, V
-    s, index_uv, u, v = densepose_outputs
-    sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences
-    k = 0
-    for detection in detections:
-        n_i = len(detection)
-        s_i = s[k : k + n_i]
-        index_uv_i = index_uv[k : k + n_i]
-        u_i = u[k : k + n_i]
-        v_i = v[k : k + n_i]
-        _local_vars = locals()
-        confidences = {
-            name: _local_vars[name]
-            for name in ("sigma_1", "sigma_2", "kappa_u", "kappa_v")
-            if _local_vars.get(name) is not None
-        }
-        densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences)
-        detection.pred_densepose = densepose_output_i
-        k += n_i
-
-
-def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
-    """
-    Computes utility values for linear interpolation at points v.
-    The points are given as normalized offsets in the source interval
-    (v0_src, v0_src + size_src), more precisely:
-        v = v0_src + v_norm * size_src / 256.0
-    The computed utilities include lower points v_lo, upper points v_hi,
-    interpolation weights v_w and flags j_valid indicating whether the
-    points falls into the destination interval (v0_dst, v0_dst + size_dst).
-
-    Args:
-        v_norm (:obj: `torch.Tensor`): tensor of size N containing
-            normalized point offsets
-        v0_src (:obj: `torch.Tensor`): tensor of size N containing
-            left bounds of source intervals for normalized points
-        size_src (:obj: `torch.Tensor`): tensor of size N containing
-            source interval sizes for normalized points
-        v0_dst (:obj: `torch.Tensor`): tensor of size N containing
-            left bounds of destination intervals
-        size_dst (:obj: `torch.Tensor`): tensor of size N containing
-            destination interval sizes
-        size_z (int): interval size for data to be interpolated
-
-    Returns:
-        v_lo (:obj: `torch.Tensor`): int tensor of size N containing
-            indices of lower values used for interpolation, all values are
-            integers from [0, size_z - 1]
-        v_hi (:obj: `torch.Tensor`): int tensor of size N containing
-            indices of upper values used for interpolation, all values are
-            integers from [0, size_z - 1]
-        v_w (:obj: `torch.Tensor`): float tensor of size N containing
-            interpolation weights
-        j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
-            0 for points outside the estimation interval
-            (v0_est, v0_est + size_est) and 1 otherwise
-    """
-    v = v0_src + v_norm * size_src / 256.0
-    j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
-    v_grid = (v - v0_dst) * size_z / size_dst
-    v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
-    v_hi = (v_lo + 1).clamp(max=size_z - 1)
-    v_grid = torch.min(v_hi.float(), v_grid)
-    v_w = v_grid - v_lo.float()
-    return v_lo, v_hi, v_w, j_valid
-
-
-def _grid_sampling_utilities(
-    zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt, x_norm, y_norm, index_bbox
-):
-    """
-    Prepare tensors used in grid sampling.
-
-    Args:
-        z_est (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with estimated
-            values of Z to be extracted for the points X, Y and channel
-            indices I
-        bbox_xywh_est (:obj: `torch.Tensor`): tensor of size (N, 4) containing
-            estimated bounding boxes in format XYWH
-        bbox_xywh_gt (:obj: `torch.Tensor`): tensor of size (N, 4) containing
-            matched ground truth bounding boxes in format XYWH
-        index_gt (:obj: `torch.Tensor`): tensor of size K with point labels for
-            ground truth points
-        x_norm (:obj: `torch.Tensor`): tensor of size K with X normalized
-            coordinates of ground truth points. Image X coordinates can be
-            obtained as X = Xbbox + x_norm * Wbbox / 255
-        y_norm (:obj: `torch.Tensor`): tensor of size K with Y normalized
-            coordinates of ground truth points. Image Y coordinates can be
-            obtained as Y = Ybbox + y_norm * Hbbox / 255
-        index_bbox (:obj: `torch.Tensor`): tensor of size K with bounding box
-            indices for each ground truth point. The values are thus in
-            [0, N-1]
-
-    Returns:
-        j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
-            0 for points to be discarded and 1 for points to be selected
-        y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
-            in z_est for each point
-        y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
-            in z_est for each point
-        x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
-            in z_est for each point
-        x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
-            in z_est for each point
-        w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
-            contains upper-left value weight for each point
-        w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
-            contains upper-right value weight for each point
-        w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
-            contains lower-left value weight for each point
-        w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
-            contains lower-right value weight for each point
-    """
-
-    x0_gt, y0_gt, w_gt, h_gt = bbox_xywh_gt[index_bbox].unbind(dim=1)
-    x0_est, y0_est, w_est, h_est = bbox_xywh_est[index_bbox].unbind(dim=1)
-    x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
-        x_norm, x0_gt, w_gt, x0_est, w_est, zw
-    )
-    y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
-        y_norm, y0_gt, h_gt, y0_est, h_est, zh
-    )
-    j_valid = jx_valid * jy_valid
-
-    w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
-    w_ylo_xhi = x_w * (1.0 - y_w)
-    w_yhi_xlo = (1.0 - x_w) * y_w
-    w_yhi_xhi = x_w * y_w
-
-    return j_valid, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi
-
-
-def _extract_at_points_packed(
-    z_est,
-    index_bbox_valid,
-    slice_index_uv,
-    y_lo,
-    y_hi,
-    x_lo,
-    x_hi,
-    w_ylo_xlo,
-    w_ylo_xhi,
-    w_yhi_xlo,
-    w_yhi_xhi,
-):
-    """
-    Extract ground truth values z_gt for valid point indices and estimated
-    values z_est using bilinear interpolation over top-left (y_lo, x_lo),
-    top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
-    (y_hi, x_hi) values in z_est with corresponding weights:
-    w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
-    Use slice_index_uv to slice dim=1 in z_est
-    """
-    z_est_sampled = (
-        z_est[index_bbox_valid, slice_index_uv, y_lo, x_lo] * w_ylo_xlo
-        + z_est[index_bbox_valid, slice_index_uv, y_lo, x_hi] * w_ylo_xhi
-        + z_est[index_bbox_valid, slice_index_uv, y_hi, x_lo] * w_yhi_xlo
-        + z_est[index_bbox_valid, slice_index_uv, y_hi, x_hi] * w_yhi_xhi
-    )
-    return z_est_sampled
-
-
-def _resample_data(
-    z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros"
-):
-    """
-    Args:
-        z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
-            resampled
-        bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
-            source bounding boxes in format XYWH
-        bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
-            destination bounding boxes in format XYWH
-    Return:
-        zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
-            with resampled values of z, where D is the discretization size
-    """
-    n = bbox_xywh_src.size(0)
-    assert n == bbox_xywh_dst.size(0), (
-        "The number of "
-        "source ROIs for resampling ({}) should be equal to the number "
-        "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
-    )
-    x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
-    x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
-    x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
-    y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
-    x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
-    y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
-    grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
-    grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
-    grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
-    grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
-    dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
-    dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
-    x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
-    y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
-    grid_x = grid_w_expanded * dx_expanded + x0_expanded
-    grid_y = grid_h_expanded * dy_expanded + y0_expanded
-    grid = torch.stack((grid_x, grid_y), dim=3)
-    # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
-    zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
-    return zresampled
-
-
-def _extract_single_tensors_from_matches_one_image(
-    proposals_targets, bbox_with_dp_offset, bbox_global_offset
-):
-    i_gt_all = []
-    x_norm_all = []
-    y_norm_all = []
-    u_gt_all = []
-    v_gt_all = []
-    s_gt_all = []
-    bbox_xywh_gt_all = []
-    bbox_xywh_est_all = []
-    # Ibbox_all == k should be true for all data that corresponds
-    # to bbox_xywh_gt[k] and bbox_xywh_est[k]
-    # index k here is global wrt images
-    i_bbox_all = []
-    # at offset k (k is global) contains index of bounding box data
-    # within densepose output tensor
-    i_with_dp = []
-
-    boxes_xywh_est = proposals_targets.proposal_boxes.clone()
-    boxes_xywh_gt = proposals_targets.gt_boxes.clone()
-    n_i = len(boxes_xywh_est)
-    assert n_i == len(boxes_xywh_gt)
-
-    if n_i:
-        boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0]
-        boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1]
-        boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0]
-        boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1]
-        if hasattr(proposals_targets, "gt_densepose"):
-            densepose_gt = proposals_targets.gt_densepose
-            for k, box_xywh_est, box_xywh_gt, dp_gt in zip(
-                range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt
-            ):
-                if (dp_gt is not None) and (len(dp_gt.x) > 0):
-                    i_gt_all.append(dp_gt.i)
-                    x_norm_all.append(dp_gt.x)
-                    y_norm_all.append(dp_gt.y)
-                    u_gt_all.append(dp_gt.u)
-                    v_gt_all.append(dp_gt.v)
-                    s_gt_all.append(dp_gt.segm.unsqueeze(0))
-                    bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4))
-                    bbox_xywh_est_all.append(box_xywh_est.view(-1, 4))
-                    i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp))
-                    i_bbox_all.append(i_bbox_k)
-                    i_with_dp.append(bbox_global_offset + k)
-    return (
-        i_gt_all,
-        x_norm_all,
-        y_norm_all,
-        u_gt_all,
-        v_gt_all,
-        s_gt_all,
-        bbox_xywh_gt_all,
-        bbox_xywh_est_all,
-        i_bbox_all,
-        i_with_dp,
-    )
-
-
-def _extract_single_tensors_from_matches(proposals_with_targets):
-    i_img = []
-    i_gt_all = []
-    x_norm_all = []
-    y_norm_all = []
-    u_gt_all = []
-    v_gt_all = []
-    s_gt_all = []
-    bbox_xywh_gt_all = []
-    bbox_xywh_est_all = []
-    i_bbox_all = []
-    i_with_dp_all = []
-    n = 0
-    for i, proposals_targets_per_image in enumerate(proposals_with_targets):
-        n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
-        if not n_i:
-            continue
-        (
-            i_gt_img,
-            x_norm_img,
-            y_norm_img,
-            u_gt_img,
-            v_gt_img,
-            s_gt_img,
-            bbox_xywh_gt_img,
-            bbox_xywh_est_img,
-            i_bbox_img,
-            i_with_dp_img,
-        ) = _extract_single_tensors_from_matches_one_image(  # noqa
-            proposals_targets_per_image, len(i_with_dp_all), n
-        )
-        i_gt_all.extend(i_gt_img)
-        x_norm_all.extend(x_norm_img)
-        y_norm_all.extend(y_norm_img)
-        u_gt_all.extend(u_gt_img)
-        v_gt_all.extend(v_gt_img)
-        s_gt_all.extend(s_gt_img)
-        bbox_xywh_gt_all.extend(bbox_xywh_gt_img)
-        bbox_xywh_est_all.extend(bbox_xywh_est_img)
-        i_bbox_all.extend(i_bbox_img)
-        i_with_dp_all.extend(i_with_dp_img)
-        i_img.extend([i] * len(i_with_dp_img))
-        n += n_i
-    # concatenate all data into a single tensor
-    if (n > 0) and (len(i_with_dp_all) > 0):
-        i_gt = torch.cat(i_gt_all, 0).long()
-        x_norm = torch.cat(x_norm_all, 0)
-        y_norm = torch.cat(y_norm_all, 0)
-        u_gt = torch.cat(u_gt_all, 0)
-        v_gt = torch.cat(v_gt_all, 0)
-        s_gt = torch.cat(s_gt_all, 0)
-        bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0)
-        bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0)
-        i_bbox = torch.cat(i_bbox_all, 0).long()
-    else:
-        i_gt = None
-        x_norm = None
-        y_norm = None
-        u_gt = None
-        v_gt = None
-        s_gt = None
-        bbox_xywh_gt = None
-        bbox_xywh_est = None
-        i_bbox = None
-    return (
-        i_img,
-        i_with_dp_all,
-        bbox_xywh_est,
-        bbox_xywh_gt,
-        i_gt,
-        x_norm,
-        y_norm,
-        u_gt,
-        v_gt,
-        s_gt,
-        i_bbox,
-    )
-
-
-class IIDIsotropicGaussianUVLoss(nn.Module):
-    """
-    Loss for the case of iid residuals with isotropic covariance:
-    $Sigma_i = sigma_i^2 I$
-    The loss (negative log likelihood) is then:
-    $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
-    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
-    difference between estimated and ground truth UV values
-    For details, see:
-    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
-    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
-    """
-
-    def __init__(self, sigma_lower_bound: float):
-        super(IIDIsotropicGaussianUVLoss, self).__init__()
-        self.sigma_lower_bound = sigma_lower_bound
-        self.log2pi = math.log(2 * math.pi)
-
-    def forward(
-        self,
-        u: torch.Tensor,
-        v: torch.Tensor,
-        sigma_u: torch.Tensor,
-        target_u: torch.Tensor,
-        target_v: torch.Tensor,
-    ):
-        # compute $\sigma_i^2$
-        # use sigma_lower_bound to avoid degenerate solution for variance
-        # (sigma -> 0)
-        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
-        # compute \|delta_i\|^2
-        delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
-        # the total loss from the formula above:
-        loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
-        return loss.sum()
-
-
-class IndepAnisotropicGaussianUVLoss(nn.Module):
-    """
-    Loss for the case of independent residuals with anisotropic covariances:
-    $Sigma_i = sigma_i^2 I + r_i r_i^T$
-    The loss (negative log likelihood) is then:
-    $1/2 sum_{i=1}^n (log(2 pi)
-      + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
-      + ||delta_i||^2 / sigma_i^2
-      - <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
-    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
-    difference between estimated and ground truth UV values
-    For details, see:
-    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
-    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
-    """
-
-    def __init__(self, sigma_lower_bound: float):
-        super(IndepAnisotropicGaussianUVLoss, self).__init__()
-        self.sigma_lower_bound = sigma_lower_bound
-        self.log2pi = math.log(2 * math.pi)
-
-    def forward(
-        self,
-        u: torch.Tensor,
-        v: torch.Tensor,
-        sigma_u: torch.Tensor,
-        kappa_u_est: torch.Tensor,
-        kappa_v_est: torch.Tensor,
-        target_u: torch.Tensor,
-        target_v: torch.Tensor,
-    ):
-        # compute $\sigma_i^2$
-        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
-        # compute \|r_i\|^2
-        r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2
-        delta_u = u - target_u
-        delta_v = v - target_v
-        # compute \|delta_i\|^2
-        delta_sqnorm = delta_u ** 2 + delta_v ** 2
-        delta_u_r_u = delta_u * kappa_u_est
-        delta_v_r_v = delta_v * kappa_v_est
-        # compute the scalar product <delta_i, r_i>
-        delta_r = delta_u_r_u + delta_v_r_v
-        # compute squared scalar product <delta_i, r_i>^2
-        delta_r_sqnorm = delta_r ** 2
-        denom2 = sigma2 * (sigma2 + r_sqnorm2)
-        loss = 0.5 * (
-            self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
-        )
-        return loss.sum()
-
-
-class DensePoseLosses(object):
-    def __init__(self, cfg):
-        # fmt: off
-        self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
-        self.w_points     = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
-        self.w_part       = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
-        self.w_segm       = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
-        self.n_segm_chan  = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
-        # fmt: on
-        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
-        if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
-            self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
-                self.confidence_model_cfg.uv_confidence.epsilon
-            )
-        elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
-            self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
-                self.confidence_model_cfg.uv_confidence.epsilon
-            )
-
-    def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences):
-        losses = {}
-        # densepose outputs are computed for all images and all bounding boxes;
-        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
-        # the outputs will have size(0) == 3+1+2+1 == 7
-        s, index_uv, u, v = densepose_outputs
-        sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences
-        conf_type = self.confidence_model_cfg.uv_confidence.type
-        assert u.size(2) == v.size(2)
-        assert u.size(3) == v.size(3)
-        assert u.size(2) == index_uv.size(2)
-        assert u.size(3) == index_uv.size(3)
-
-        with torch.no_grad():
-            (
-                index_uv_img,
-                i_with_dp,
-                bbox_xywh_est,
-                bbox_xywh_gt,
-                index_gt_all,
-                x_norm,
-                y_norm,
-                u_gt_all,
-                v_gt_all,
-                s_gt,
-                index_bbox,
-            ) = _extract_single_tensors_from_matches(  # noqa
-                proposals_with_gt
-            )
-        n_batch = len(i_with_dp)
-
-        # NOTE: we need to keep the same computation graph on all the GPUs to
-        # perform reduction properly. Hence even if we have no data on one
-        # of the GPUs, we still need to generate the computation graph.
-        # Add fake (zero) loss in the form Tensor.sum() * 0
-        if not n_batch:
-            losses["loss_densepose_I"] = index_uv.sum() * 0
-            losses["loss_densepose_S"] = s.sum() * 0
-            if self.confidence_model_cfg.uv_confidence.enabled:
-                losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0
-                if conf_type == DensePoseUVConfidenceType.IID_ISO:
-                    losses["loss_densepose_UV"] += sigma_2.sum() * 0
-                elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
-                    losses["loss_densepose_UV"] += (
-                        sigma_2.sum() + kappa_u.sum() + kappa_v.sum()
-                    ) * 0
-            else:
-                losses["loss_densepose_U"] = u.sum() * 0
-                losses["loss_densepose_V"] = v.sum() * 0
-            return losses
-
-        zh = u.size(2)
-        zw = u.size(3)
-
-        (
-            j_valid,
-            y_lo,
-            y_hi,
-            x_lo,
-            x_hi,
-            w_ylo_xlo,
-            w_ylo_xhi,
-            w_yhi_xlo,
-            w_yhi_xhi,
-        ) = _grid_sampling_utilities(  # noqa
-            zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, index_bbox
-        )
-
-        j_valid_fg = j_valid * (index_gt_all > 0)
-
-        u_gt = u_gt_all[j_valid_fg]
-        u_est_all = _extract_at_points_packed(
-            u[i_with_dp],
-            index_bbox,
-            index_gt_all,
-            y_lo,
-            y_hi,
-            x_lo,
-            x_hi,
-            w_ylo_xlo,
-            w_ylo_xhi,
-            w_yhi_xlo,
-            w_yhi_xhi,
-        )
-        u_est = u_est_all[j_valid_fg]
-
-        v_gt = v_gt_all[j_valid_fg]
-        v_est_all = _extract_at_points_packed(
-            v[i_with_dp],
-            index_bbox,
-            index_gt_all,
-            y_lo,
-            y_hi,
-            x_lo,
-            x_hi,
-            w_ylo_xlo,
-            w_ylo_xhi,
-            w_yhi_xlo,
-            w_yhi_xhi,
-        )
-        v_est = v_est_all[j_valid_fg]
-
-        index_uv_gt = index_gt_all[j_valid]
-        index_uv_est_all = _extract_at_points_packed(
-            index_uv[i_with_dp],
-            index_bbox,
-            slice(None),
-            y_lo,
-            y_hi,
-            x_lo,
-            x_hi,
-            w_ylo_xlo[:, None],
-            w_ylo_xhi[:, None],
-            w_yhi_xlo[:, None],
-            w_yhi_xhi[:, None],
-        )
-        index_uv_est = index_uv_est_all[j_valid, :]
-
-        if self.confidence_model_cfg.uv_confidence.enabled:
-            sigma_2_est_all = _extract_at_points_packed(
-                sigma_2[i_with_dp],
-                index_bbox,
-                index_gt_all,
-                y_lo,
-                y_hi,
-                x_lo,
-                x_hi,
-                w_ylo_xlo,
-                w_ylo_xhi,
-                w_yhi_xlo,
-                w_yhi_xhi,
-            )
-            sigma_2_est = sigma_2_est_all[j_valid_fg]
-            if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
-                kappa_u_est_all = _extract_at_points_packed(
-                    kappa_u[i_with_dp],
-                    index_bbox,
-                    index_gt_all,
-                    y_lo,
-                    y_hi,
-                    x_lo,
-                    x_hi,
-                    w_ylo_xlo,
-                    w_ylo_xhi,
-                    w_yhi_xlo,
-                    w_yhi_xhi,
-                )
-                kappa_u_est = kappa_u_est_all[j_valid_fg]
-                kappa_v_est_all = _extract_at_points_packed(
-                    kappa_v[i_with_dp],
-                    index_bbox,
-                    index_gt_all,
-                    y_lo,
-                    y_hi,
-                    x_lo,
-                    x_hi,
-                    w_ylo_xlo,
-                    w_ylo_xhi,
-                    w_yhi_xlo,
-                    w_yhi_xhi,
-                )
-                kappa_v_est = kappa_v_est_all[j_valid_fg]
-
-        # Resample everything to the estimated data size, no need to resample
-        # S_est then:
-        s_est = s[i_with_dp]
-        with torch.no_grad():
-            s_gt = _resample_data(
-                s_gt.unsqueeze(1),
-                bbox_xywh_gt,
-                bbox_xywh_est,
-                self.heatmap_size,
-                self.heatmap_size,
-                mode="nearest",
-                padding_mode="zeros",
-            ).squeeze(1)
-
-        # add point-based losses:
-        if self.confidence_model_cfg.uv_confidence.enabled:
-            if conf_type == DensePoseUVConfidenceType.IID_ISO:
-                uv_loss = (
-                    self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
-                    * self.w_points
-                )
-                losses["loss_densepose_UV"] = uv_loss
-            elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
-                uv_loss = (
-                    self.uv_loss_with_confidences(
-                        u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
-                    )
-                    * self.w_points
-                )
-                losses["loss_densepose_UV"] = uv_loss
-            else:
-                raise ValueError(f"Unknown confidence model type: {conf_type}")
-        else:
-            u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points
-            losses["loss_densepose_U"] = u_loss
-            v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points
-            losses["loss_densepose_V"] = v_loss
-        index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part
-        losses["loss_densepose_I"] = index_uv_loss
-
-        if self.n_segm_chan == 2:
-            s_gt = s_gt > 0
-        s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm
-        losses["loss_densepose_S"] = s_loss
-        return losses
-
-
-def build_densepose_losses(cfg):
-    losses = DensePoseLosses(cfg)
-    return losses
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
deleted file mode 100644
index 3bb002b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import contextlib
-import copy
-import io
-import itertools
-import json
-import logging
-import os
-from collections import OrderedDict
-import torch
-from fvcore.common.file_io import PathManager
-from pycocotools.coco import COCO
-
-from detectron2.data import MetadataCatalog
-from detectron2.evaluation import DatasetEvaluator
-from detectron2.structures import BoxMode
-from detectron2.utils.comm import all_gather, is_main_process, synchronize
-from detectron2.utils.logger import create_small_table
-
-from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
-
-
-class DensePoseCOCOEvaluator(DatasetEvaluator):
-    def __init__(self, dataset_name, distributed, output_dir=None):
-        self._distributed = distributed
-        self._output_dir = output_dir
-
-        self._cpu_device = torch.device("cpu")
-        self._logger = logging.getLogger(__name__)
-
-        self._metadata = MetadataCatalog.get(dataset_name)
-        json_file = PathManager.get_local_path(self._metadata.json_file)
-        with contextlib.redirect_stdout(io.StringIO()):
-            self._coco_api = COCO(json_file)
-
-    def reset(self):
-        self._predictions = []
-
-    def process(self, inputs, outputs):
-        """
-        Args:
-            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
-                It is a list of dict. Each dict corresponds to an image and
-                contains keys like "height", "width", "file_name", "image_id".
-            outputs: the outputs of a COCO model. It is a list of dicts with key
-                "instances" that contains :class:`Instances`.
-                The :class:`Instances` object needs to have `densepose` field.
-        """
-        for input, output in zip(inputs, outputs):
-            instances = output["instances"].to(self._cpu_device)
-
-            boxes = instances.pred_boxes.tensor.clone()
-            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
-            instances.pred_densepose = instances.pred_densepose.to_result(boxes)
-
-            json_results = prediction_to_json(instances, input["image_id"])
-            self._predictions.extend(json_results)
-
-    def evaluate(self):
-        if self._distributed:
-            synchronize()
-            predictions = all_gather(self._predictions)
-            predictions = list(itertools.chain(*predictions))
-            if not is_main_process():
-                return
-        else:
-            predictions = self._predictions
-
-        return copy.deepcopy(self._eval_predictions(predictions))
-
-    def _eval_predictions(self, predictions):
-        """
-        Evaluate predictions on densepose.
-        Return results with the metrics of the tasks.
-        """
-        self._logger.info("Preparing results for COCO format ...")
-
-        if self._output_dir:
-            file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
-            with open(file_path, "w") as f:
-                json.dump(predictions, f)
-                f.flush()
-                os.fsync(f.fileno())
-
-        self._logger.info("Evaluating predictions ...")
-        res = OrderedDict()
-        results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
-        res["densepose_gps"] = results_gps
-        res["densepose_gpsm"] = results_gpsm
-        return res
-
-
-def prediction_to_json(instances, img_id):
-    """
-    Args:
-        instances (Instances): the output of the model
-        img_id (str): the image id in COCO
-
-    Returns:
-        list[dict]: the results in densepose evaluation format
-    """
-    scores = instances.scores.tolist()
-
-    results = []
-    for k in range(len(instances)):
-        densepose = instances.pred_densepose[k]
-        result = {
-            "image_id": img_id,
-            "category_id": 1,  # densepose only has one class
-            "bbox": densepose[1],
-            "score": scores[k],
-            "densepose": densepose,
-        }
-        results.append(result)
-    return results
-
-
-def _evaluate_predictions_on_coco(coco_gt, coco_results):
-    metrics = ["AP", "AP50", "AP75", "APm", "APl"]
-
-    logger = logging.getLogger(__name__)
-
-    if len(coco_results) == 0:  # cocoapi does not handle empty results very well
-        logger.warn("No predictions from the model! Set scores to -1")
-        results_gps = {metric: -1 for metric in metrics}
-        results_gpsm = {metric: -1 for metric in metrics}
-        return results_gps, results_gpsm
-
-    coco_dt = coco_gt.loadRes(coco_results)
-    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
-    logger.info(
-        "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
-    )
-    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
-    logger.info(
-        "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
-    )
-    return results_gps, results_gpsm
-
-
-def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
-    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
-    coco_eval.evaluate()
-    coco_eval.accumulate()
-    coco_eval.summarize()
-    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
-    return results
-
-
-def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
-    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
-    coco_eval.evaluate()
-    coco_eval.accumulate()
-    coco_eval.summarize()
-    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
-    return results
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
deleted file mode 100644
index fcf69db..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
-
-
-class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
-    def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
-        """
-        Args:
-            cfg (CfgNode):
-            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
-            transform_data (DensePoseTransformData): contains symmetry label
-                transforms used for horizontal flip
-            tta_mapper (callable): takes a dataset dict and returns a list of
-                augmented versions of the dataset dict. Defaults to
-                `DatasetMapperTTA(cfg)`.
-            batch_size (int): batch the augmented images into this batch size for inference.
-        """
-        self._transform_data = transform_data
-        super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
-
-    # the implementation follows closely the one from detectron2/modeling
-    def _inference_one_image(self, input):
-        """
-        Args:
-            input (dict): one dataset dict
-
-        Returns:
-            dict: one output dict
-        """
-
-        augmented_inputs, aug_vars = self._get_augmented_inputs(input)
-        # Detect boxes from all augmented versions
-        with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
-            # temporarily disable roi heads
-            all_boxes, all_scores, all_classes = self._get_augmented_boxes(
-                augmented_inputs, aug_vars
-            )
-        merged_instances = self._merge_detections(
-            all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
-        )
-
-        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
-            # Use the detected boxes to obtain new fields
-            augmented_instances = self._rescale_detected_boxes(
-                augmented_inputs, merged_instances, aug_vars
-            )
-            # run forward on the detected boxes
-            outputs = self._batch_inference(
-                augmented_inputs, augmented_instances, do_postprocess=False
-            )
-            # Delete now useless variables to avoid being out of memory
-            del augmented_inputs, augmented_instances, merged_instances
-            # average the predictions
-            if self.cfg.MODEL.MASK_ON:
-                outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
-            if self.cfg.MODEL.DENSEPOSE_ON:
-                outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
-            # postprocess
-            output = self._detector_postprocess(outputs[0], aug_vars)
-            return {"instances": output}
-        else:
-            return {"instances": merged_instances}
-
-    def _reduce_pred_densepose(self, outputs, aug_vars):
-        for idx, output in enumerate(outputs):
-            if aug_vars["do_hflip"][idx]:
-                output.pred_densepose.hflip(self._transform_data)
-        # Less memory-intensive averaging
-        for attr in "SIUV":
-            setattr(
-                outputs[0].pred_densepose,
-                attr,
-                sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
-            )
-        return outputs[0].pred_densepose
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
deleted file mode 100644
index 0231197..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import numpy as np
-from typing import Dict
-import fvcore.nn.weight_init as weight_init
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-
-from detectron2.layers import Conv2d, ShapeSpec, get_norm
-from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
-from detectron2.modeling.poolers import ROIPooler
-from detectron2.modeling.roi_heads import select_foreground_proposals
-
-from .densepose_head import (
-    build_densepose_data_filter,
-    build_densepose_head,
-    build_densepose_losses,
-    build_densepose_predictor,
-    densepose_inference,
-)
-
-
-class Decoder(nn.Module):
-    """
-    A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
-    (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
-    all levels of the FPN into single output.
-    """
-
-    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
-        super(Decoder, self).__init__()
-
-        # fmt: off
-        self.in_features      = in_features
-        feature_strides       = {k: v.stride for k, v in input_shape.items()}
-        feature_channels      = {k: v.channels for k, v in input_shape.items()}
-        num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
-        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
-        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
-        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
-        # fmt: on
-
-        self.scale_heads = []
-        for in_feature in self.in_features:
-            head_ops = []
-            head_length = max(
-                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
-            )
-            for k in range(head_length):
-                conv = Conv2d(
-                    feature_channels[in_feature] if k == 0 else conv_dims,
-                    conv_dims,
-                    kernel_size=3,
-                    stride=1,
-                    padding=1,
-                    bias=not norm,
-                    norm=get_norm(norm, conv_dims),
-                    activation=F.relu,
-                )
-                weight_init.c2_msra_fill(conv)
-                head_ops.append(conv)
-                if feature_strides[in_feature] != self.common_stride:
-                    head_ops.append(
-                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
-                    )
-            self.scale_heads.append(nn.Sequential(*head_ops))
-            self.add_module(in_feature, self.scale_heads[-1])
-        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
-        weight_init.c2_msra_fill(self.predictor)
-
-    def forward(self, features):
-        for i, _ in enumerate(self.in_features):
-            if i == 0:
-                x = self.scale_heads[i](features[i])
-            else:
-                x = x + self.scale_heads[i](features[i])
-        x = self.predictor(x)
-        return x
-
-
-@ROI_HEADS_REGISTRY.register()
-class DensePoseROIHeads(StandardROIHeads):
-    """
-    A Standard ROIHeads which contains an addition of DensePose head.
-    """
-
-    def __init__(self, cfg, input_shape):
-        super().__init__(cfg, input_shape)
-        self._init_densepose_head(cfg, input_shape)
-
-    def _init_densepose_head(self, cfg, input_shape):
-        # fmt: off
-        self.densepose_on          = cfg.MODEL.DENSEPOSE_ON
-        if not self.densepose_on:
-            return
-        self.densepose_data_filter = build_densepose_data_filter(cfg)
-        dp_pooler_resolution       = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
-        dp_pooler_sampling_ratio   = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
-        dp_pooler_type             = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
-        self.use_decoder           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
-        # fmt: on
-        if self.use_decoder:
-            dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
-        else:
-            dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
-        in_channels = [input_shape[f].channels for f in self.in_features][0]
-
-        if self.use_decoder:
-            self.decoder = Decoder(cfg, input_shape, self.in_features)
-
-        self.densepose_pooler = ROIPooler(
-            output_size=dp_pooler_resolution,
-            scales=dp_pooler_scales,
-            sampling_ratio=dp_pooler_sampling_ratio,
-            pooler_type=dp_pooler_type,
-        )
-        self.densepose_head = build_densepose_head(cfg, in_channels)
-        self.densepose_predictor = build_densepose_predictor(
-            cfg, self.densepose_head.n_out_channels
-        )
-        self.densepose_losses = build_densepose_losses(cfg)
-
-    def _forward_densepose(self, features, instances):
-        """
-        Forward logic of the densepose prediction branch.
-
-        Args:
-            features (list[Tensor]): #level input features for densepose prediction
-            instances (list[Instances]): the per-image instances to train/predict densepose.
-                In training, they can be the proposals.
-                In inference, they can be the predicted boxes.
-
-        Returns:
-            In training, a dict of losses.
-            In inference, update `instances` with new fields "densepose" and return it.
-        """
-        if not self.densepose_on:
-            return {} if self.training else instances
-
-        features = [features[f] for f in self.in_features]
-        if self.training:
-            proposals, _ = select_foreground_proposals(instances, self.num_classes)
-            proposals_dp = self.densepose_data_filter(proposals)
-            if len(proposals_dp) > 0:
-                # NOTE may deadlock in DDP if certain workers have empty proposals_dp
-                proposal_boxes = [x.proposal_boxes for x in proposals_dp]
-
-                if self.use_decoder:
-                    features = [self.decoder(features)]
-
-                features_dp = self.densepose_pooler(features, proposal_boxes)
-                densepose_head_outputs = self.densepose_head(features_dp)
-                densepose_outputs, _, confidences, _ = self.densepose_predictor(
-                    densepose_head_outputs
-                )
-                densepose_loss_dict = self.densepose_losses(
-                    proposals_dp, densepose_outputs, confidences
-                )
-                return densepose_loss_dict
-        else:
-            pred_boxes = [x.pred_boxes for x in instances]
-
-            if self.use_decoder:
-                features = [self.decoder(features)]
-
-            features_dp = self.densepose_pooler(features, pred_boxes)
-            if len(features_dp) > 0:
-                densepose_head_outputs = self.densepose_head(features_dp)
-                densepose_outputs, _, confidences, _ = self.densepose_predictor(
-                    densepose_head_outputs
-                )
-            else:
-                # If no detection occurred instances
-                # set densepose_outputs to empty tensors
-                empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
-                densepose_outputs = tuple([empty_tensor] * 4)
-                confidences = tuple([empty_tensor] * 4)
-
-            densepose_inference(densepose_outputs, confidences, instances)
-            return instances
-
-    def forward(self, images, features, proposals, targets=None):
-        instances, losses = super().forward(images, features, proposals, targets)
-        del targets, images
-
-        if self.training:
-            losses.update(self._forward_densepose(features, instances))
-        return instances, losses
-
-    def forward_with_given_boxes(self, features, instances):
-        """
-        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
-
-        This is useful for downstream tasks where a box is known, but need to obtain
-        other attributes (outputs of other heads).
-        Test-time augmentation also uses this.
-
-        Args:
-            features: same as in `forward()`
-            instances (list[Instances]): instances to predict other outputs. Expect the keys
-                "pred_boxes" and "pred_classes" to exist.
-
-        Returns:
-            instances (list[Instances]):
-                the same `Instances` objects, with extra
-                fields such as `pred_masks` or `pred_keypoints`.
-        """
-
-        instances = super().forward_with_given_boxes(features, instances)
-        instances = self._forward_densepose(features, instances)
-        return instances
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
deleted file mode 100644
index b28862c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import Any, Dict, Optional, Tuple
-
-
-class EntrySelector(object):
-    """
-    Base class for entry selectors
-    """
-
-    @staticmethod
-    def from_string(spec: str) -> "EntrySelector":
-        if spec == "*":
-            return AllEntrySelector()
-        return FieldEntrySelector(spec)
-
-
-class AllEntrySelector(EntrySelector):
-    """
-    Selector that accepts all entries
-    """
-
-    SPECIFIER = "*"
-
-    def __call__(self, entry):
-        return True
-
-
-class FieldEntrySelector(EntrySelector):
-    """
-    Selector that accepts only entries that match provided field
-    specifier(s). Only a limited set of specifiers is supported for now:
-      <specifiers>::=<specifier>[<comma><specifiers>]
-      <specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
-      <field_name> is a valid identifier
-      <type> ::= "int" | "str"
-      <equal> ::= "="
-      <comma> ::= ","
-      <type_delim> ::= ":"
-      <value_or_range> ::= <value> | <range>
-      <range> ::= <value><range_delim><value>
-      <range_delim> ::= "-"
-      <value> is a string without spaces and special symbols
-        (e.g. <comma>, <equal>, <type_delim>, <range_delim>)
-    """
-
-    _SPEC_DELIM = ","
-    _TYPE_DELIM = ":"
-    _RANGE_DELIM = "-"
-    _EQUAL = "="
-    _ERROR_PREFIX = "Invalid field selector specifier"
-
-    class _FieldEntryValuePredicate(object):
-        """
-        Predicate that checks strict equality for the specified entry field
-        """
-
-        def __init__(self, name: str, typespec: str, value: str):
-            import builtins
-
-            self.name = name
-            self.type = getattr(builtins, typespec) if typespec is not None else str
-            self.value = value
-
-        def __call__(self, entry):
-            return entry[self.name] == self.type(self.value)
-
-    class _FieldEntryRangePredicate(object):
-        """
-        Predicate that checks whether an entry field falls into the specified range
-        """
-
-        def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
-            import builtins
-
-            self.name = name
-            self.type = getattr(builtins, typespec) if typespec is not None else str
-            self.vmin = vmin
-            self.vmax = vmax
-
-        def __call__(self, entry):
-            return (entry[self.name] >= self.type(self.vmin)) and (
-                entry[self.name] <= self.type(self.vmax)
-            )
-
-    def __init__(self, spec: str):
-        self._predicates = self._parse_specifier_into_predicates(spec)
-
-    def __call__(self, entry: Dict[str, Any]):
-        for predicate in self._predicates:
-            if not predicate(entry):
-                return False
-        return True
-
-    def _parse_specifier_into_predicates(self, spec: str):
-        predicates = []
-        specs = spec.split(self._SPEC_DELIM)
-        for subspec in specs:
-            eq_idx = subspec.find(self._EQUAL)
-            if eq_idx > 0:
-                field_name_with_type = subspec[:eq_idx]
-                field_name, field_type = self._parse_field_name_type(field_name_with_type)
-                field_value_or_range = subspec[eq_idx + 1 :]
-                if self._is_range_spec(field_value_or_range):
-                    vmin, vmax = self._get_range_spec(field_value_or_range)
-                    predicate = FieldEntrySelector._FieldEntryRangePredicate(
-                        field_name, field_type, vmin, vmax
-                    )
-                else:
-                    predicate = FieldEntrySelector._FieldEntryValuePredicate(
-                        field_name, field_type, field_value_or_range
-                    )
-                predicates.append(predicate)
-            elif eq_idx == 0:
-                self._parse_error(f'"{subspec}", field name is empty!')
-            else:
-                self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
-        return predicates
-
-    def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
-        type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
-        if type_delim_idx > 0:
-            field_name = field_name_with_type[:type_delim_idx]
-            field_type = field_name_with_type[type_delim_idx + 1 :]
-        elif type_delim_idx == 0:
-            self._parse_error(f'"{field_name_with_type}", field name is empty!')
-        else:
-            field_name = field_name_with_type
-            field_type = None
-        return field_name, field_type
-
-    def _is_range_spec(self, field_value_or_range):
-        delim_idx = field_value_or_range.find(self._RANGE_DELIM)
-        return delim_idx > 0
-
-    def _get_range_spec(self, field_value_or_range):
-        if self._is_range_spec(field_value_or_range):
-            delim_idx = field_value_or_range.find(self._RANGE_DELIM)
-            vmin = field_value_or_range[:delim_idx]
-            vmax = field_value_or_range[delim_idx + 1 :]
-            return vmin, vmax
-        else:
-            self._parse_error('"field_value_or_range", range of values expected!')
-
-    def _parse_error(self, msg):
-        raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
deleted file mode 100644
index e3fa45e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-
-
-def verbosity_to_level(verbosity):
-    if verbosity is not None:
-        if verbosity == 0:
-            return logging.WARNING
-        elif verbosity == 1:
-            return logging.INFO
-        elif verbosity >= 2:
-            return logging.DEBUG
-    return logging.WARNING
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
deleted file mode 100644
index b7cfe09..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import MetadataCatalog
-
-from densepose import DensePoseTransformData
-
-
-def load_for_dataset(dataset_name):
-    path = MetadataCatalog.get(dataset_name).densepose_transform_src
-    densepose_transform_data_fpath = PathManager.get_local_path(path)
-    return DensePoseTransformData.load(densepose_transform_data_fpath)
-
-
-def load_from_cfg(cfg):
-    return load_for_dataset(cfg.DATASETS.TEST[0])
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
deleted file mode 100644
index 2aa3e6e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import numpy as np
-import cv2
-import torch
-
-Image = np.ndarray
-Boxes = torch.Tensor
-
-
-class MatrixVisualizer(object):
-    """
-    Base visualizer for matrix data
-    """
-
-    def __init__(
-        self,
-        inplace=True,
-        cmap=cv2.COLORMAP_PARULA,
-        val_scale=1.0,
-        alpha=0.7,
-        interp_method_matrix=cv2.INTER_LINEAR,
-        interp_method_mask=cv2.INTER_NEAREST,
-    ):
-        self.inplace = inplace
-        self.cmap = cmap
-        self.val_scale = val_scale
-        self.alpha = alpha
-        self.interp_method_matrix = interp_method_matrix
-        self.interp_method_mask = interp_method_mask
-
-    def visualize(self, image_bgr, mask, matrix, bbox_xywh):
-        self._check_image(image_bgr)
-        self._check_mask_matrix(mask, matrix)
-        if self.inplace:
-            image_target_bgr = image_bgr
-        else:
-            image_target_bgr = image_bgr * 0
-        x, y, w, h = [int(v) for v in bbox_xywh]
-        if w <= 0 or h <= 0:
-            return image_bgr
-        mask, matrix = self._resize(mask, matrix, w, h)
-        mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
-        matrix_scaled = matrix.astype(np.float32) * self.val_scale
-        _EPSILON = 1e-6
-        if np.any(matrix_scaled > 255 + _EPSILON):
-            logger = logging.getLogger(__name__)
-            logger.warning(
-                f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
-            )
-        matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
-        matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
-        matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
-        image_target_bgr[y : y + h, x : x + w, :] = (
-            image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
-        )
-        return image_target_bgr.astype(np.uint8)
-
-    def _resize(self, mask, matrix, w, h):
-        if (w != mask.shape[1]) or (h != mask.shape[0]):
-            mask = cv2.resize(mask, (w, h), self.interp_method_mask)
-        if (w != matrix.shape[1]) or (h != matrix.shape[0]):
-            matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
-        return mask, matrix
-
-    def _check_image(self, image_rgb):
-        assert len(image_rgb.shape) == 3
-        assert image_rgb.shape[2] == 3
-        assert image_rgb.dtype == np.uint8
-
-    def _check_mask_matrix(self, mask, matrix):
-        assert len(matrix.shape) == 2
-        assert len(mask.shape) == 2
-        assert mask.dtype == np.uint8
-
-
-class RectangleVisualizer(object):
-
-    _COLOR_GREEN = (18, 127, 15)
-
-    def __init__(self, color=_COLOR_GREEN, thickness=1):
-        self.color = color
-        self.thickness = thickness
-
-    def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
-        x, y, w, h = bbox_xywh
-        color = color or self.color
-        thickness = thickness or self.thickness
-        cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
-        return image_bgr
-
-
-class PointsVisualizer(object):
-
-    _COLOR_GREEN = (18, 127, 15)
-
-    def __init__(self, color_bgr=_COLOR_GREEN, r=5):
-        self.color_bgr = color_bgr
-        self.r = r
-
-    def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
-        for j, pt_xy in enumerate(pts_xy):
-            x, y = pt_xy
-            color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
-            r = rs[j] if rs is not None else self.r
-            cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
-        return image_bgr
-
-
-class TextVisualizer(object):
-
-    _COLOR_GRAY = (218, 227, 218)
-    _COLOR_WHITE = (255, 255, 255)
-
-    def __init__(
-        self,
-        font_face=cv2.FONT_HERSHEY_SIMPLEX,
-        font_color_bgr=_COLOR_GRAY,
-        font_scale=0.35,
-        font_line_type=cv2.LINE_AA,
-        font_line_thickness=1,
-        fill_color_bgr=_COLOR_WHITE,
-        fill_color_transparency=1.0,
-        frame_color_bgr=_COLOR_WHITE,
-        frame_color_transparency=1.0,
-        frame_thickness=1,
-    ):
-        self.font_face = font_face
-        self.font_color_bgr = font_color_bgr
-        self.font_scale = font_scale
-        self.font_line_type = font_line_type
-        self.font_line_thickness = font_line_thickness
-        self.fill_color_bgr = fill_color_bgr
-        self.fill_color_transparency = fill_color_transparency
-        self.frame_color_bgr = frame_color_bgr
-        self.frame_color_transparency = frame_color_transparency
-        self.frame_thickness = frame_thickness
-
-    def visualize(self, image_bgr, txt, topleft_xy):
-        txt_w, txt_h = self.get_text_size_wh(txt)
-        topleft_xy = tuple(map(int, topleft_xy))
-        x, y = topleft_xy
-        if self.frame_color_transparency < 1.0:
-            t = self.frame_thickness
-            image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
-                image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
-                * self.frame_color_transparency
-                + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
-            ).astype(np.float)
-        if self.fill_color_transparency < 1.0:
-            image_bgr[y : y + txt_h, x : x + txt_w, :] = (
-                image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
-                + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
-            ).astype(np.float)
-        cv2.putText(
-            image_bgr,
-            txt,
-            topleft_xy,
-            self.font_face,
-            self.font_scale,
-            self.font_color_bgr,
-            self.font_line_thickness,
-            self.font_line_type,
-        )
-        return image_bgr
-
-    def get_text_size_wh(self, txt):
-        ((txt_w, txt_h), _) = cv2.getTextSize(
-            txt, self.font_face, self.font_scale, self.font_line_thickness
-        )
-        return txt_w, txt_h
-
-
-class CompoundVisualizer(object):
-    def __init__(self, visualizers):
-        self.visualizers = visualizers
-
-    def visualize(self, image_bgr, data):
-        assert len(data) == len(
-            self.visualizers
-        ), "The number of datas {} should match the number of visualizers" " {}".format(
-            len(data), len(self.visualizers)
-        )
-        image = image_bgr
-        for i, visualizer in enumerate(self.visualizers):
-            image = visualizer.visualize(image, data[i])
-        return image
-
-    def __str__(self):
-        visualizer_str = ", ".join([str(v) for v in self.visualizers])
-        return "Compound Visualizer [{}]".format(visualizer_str)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
deleted file mode 100644
index d7951d6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .base import RectangleVisualizer, TextVisualizer
-
-
-class BoundingBoxVisualizer(object):
-    def __init__(self):
-        self.rectangle_visualizer = RectangleVisualizer()
-
-    def visualize(self, image_bgr, boxes_xywh):
-        for bbox_xywh in boxes_xywh:
-            image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
-        return image_bgr
-
-
-class ScoredBoundingBoxVisualizer(object):
-    def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
-        if bbox_visualizer_params is None:
-            bbox_visualizer_params = {}
-        if score_visualizer_params is None:
-            score_visualizer_params = {}
-        self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
-        self.visualizer_score = TextVisualizer(**score_visualizer_params)
-
-    def visualize(self, image_bgr, scored_bboxes):
-        boxes_xywh, box_scores = scored_bboxes
-        assert len(boxes_xywh) == len(
-            box_scores
-        ), "Number of bounding boxes {} should be equal to the number of scores {}".format(
-            len(boxes_xywh), len(box_scores)
-        )
-        for i, box_xywh in enumerate(boxes_xywh):
-            score_i = box_scores[i]
-            image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
-            score_txt = "{0:6.4f}".format(score_i)
-            topleft_xy = box_xywh[0], box_xywh[1]
-            image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
-        return image_bgr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
deleted file mode 100644
index f2e77dc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
+++ /dev/null
@@ -1,593 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import numpy as np
-from typing import Iterable, Optional, Tuple
-import cv2
-
-from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
-from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
-
-
-class DensePoseResultsVisualizer(object):
-    def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
-        if densepose_result is None:
-            return image_bgr
-        context = self.create_visualization_context(image_bgr)
-        for i, result_encoded_w_shape in enumerate(densepose_result.results):
-            iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
-            bbox_xywh = densepose_result.boxes_xywh[i]
-            self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
-        image_bgr = self.context_to_image_bgr(context)
-        return image_bgr
-
-
-class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
-    def __init__(
-        self,
-        data_extractor,
-        segm_extractor,
-        inplace=True,
-        cmap=cv2.COLORMAP_PARULA,
-        alpha=0.7,
-        val_scale=1.0,
-    ):
-        self.mask_visualizer = MatrixVisualizer(
-            inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
-        )
-        self.data_extractor = data_extractor
-        self.segm_extractor = segm_extractor
-
-    def create_visualization_context(self, image_bgr: Image):
-        return image_bgr
-
-    def context_to_image_bgr(self, context):
-        return context
-
-    def get_image_bgr_from_context(self, context):
-        return context
-
-    def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
-        image_bgr = self.get_image_bgr_from_context(context)
-        matrix = self.data_extractor(iuv_arr)
-        segm = self.segm_extractor(iuv_arr)
-        mask = np.zeros(matrix.shape, dtype=np.uint8)
-        mask[segm > 0] = 1
-        image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
-        return image_bgr
-
-
-def _extract_i_from_iuvarr(iuv_arr):
-    return iuv_arr[0, :, :]
-
-
-def _extract_u_from_iuvarr(iuv_arr):
-    return iuv_arr[1, :, :]
-
-
-def _extract_v_from_iuvarr(iuv_arr):
-    return iuv_arr[2, :, :]
-
-
-class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
-    def __init__(self, levels=10, **kwargs):
-        self.levels = levels
-        self.plot_args = kwargs
-
-    def create_visualization_context(self, image_bgr: Image):
-        import matplotlib.pyplot as plt
-        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
-
-        context = {}
-        context["image_bgr"] = image_bgr
-        dpi = 100
-        height_inches = float(image_bgr.shape[0]) / dpi
-        width_inches = float(image_bgr.shape[1]) / dpi
-        fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
-        plt.axes([0, 0, 1, 1])
-        plt.axis("off")
-        context["fig"] = fig
-        canvas = FigureCanvas(fig)
-        context["canvas"] = canvas
-        extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
-        plt.imshow(image_bgr[:, :, ::-1], extent=extent)
-        return context
-
-    def context_to_image_bgr(self, context):
-        fig = context["fig"]
-        w, h = map(int, fig.get_size_inches() * fig.get_dpi())
-        canvas = context["canvas"]
-        canvas.draw()
-        image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
-        image_rgb = image_1d.reshape(h, w, 3)
-        image_bgr = image_rgb[:, :, ::-1].copy()
-        return image_bgr
-
-    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
-        import matplotlib.pyplot as plt
-
-        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
-        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
-        extent = (
-            bbox_xywh[0],
-            bbox_xywh[0] + bbox_xywh[2],
-            bbox_xywh[1],
-            bbox_xywh[1] + bbox_xywh[3],
-        )
-        plt.contour(u, self.levels, extent=extent, **self.plot_args)
-        plt.contour(v, self.levels, extent=extent, **self.plot_args)
-
-
-class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
-    """
-    Contour visualization using marching squares
-    """
-
-    def __init__(self, levels=10, **kwargs):
-        # TODO: colormap is hardcoded
-        cmap = cv2.COLORMAP_PARULA
-        if isinstance(levels, int):
-            self.levels = np.linspace(0, 1, levels)
-        else:
-            self.levels = levels
-        if "linewidths" in kwargs:
-            self.linewidths = kwargs["linewidths"]
-        else:
-            self.linewidths = [1] * len(self.levels)
-        self.plot_args = kwargs
-        img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
-        self.level_colors_bgr = [
-            [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
-        ]
-
-    def create_visualization_context(self, image_bgr: Image):
-        return image_bgr
-
-    def context_to_image_bgr(self, context):
-        return context
-
-    def get_image_bgr_from_context(self, context):
-        return context
-
-    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
-        image_bgr = self.get_image_bgr_from_context(context)
-        segm = _extract_i_from_iuvarr(iuv_arr)
-        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
-        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
-        self._contours(image_bgr, u, segm, bbox_xywh)
-        self._contours(image_bgr, v, segm, bbox_xywh)
-
-    def _contours(self, image_bgr, arr, segm, bbox_xywh):
-        for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
-            mask = segm == part_idx
-            if not np.any(mask):
-                continue
-            arr_min = np.amin(arr[mask])
-            arr_max = np.amax(arr[mask])
-            I, J = np.nonzero(mask)
-            i0 = np.amin(I)
-            i1 = np.amax(I) + 1
-            j0 = np.amin(J)
-            j1 = np.amax(J) + 1
-            if (j1 == j0 + 1) or (i1 == i0 + 1):
-                continue
-            Nw = arr.shape[1] - 1
-            Nh = arr.shape[0] - 1
-            for level_idx, level in enumerate(self.levels):
-                if (level < arr_min) or (level > arr_max):
-                    continue
-                vp = arr[i0:i1, j0:j1] >= level
-                bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
-                mp = mask[i0:i1, j0:j1]
-                bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
-                it = np.nditer(bin_codes, flags=["multi_index"])
-                color_bgr = self.level_colors_bgr[level_idx]
-                linewidth = self.linewidths[level_idx]
-                while not it.finished:
-                    if (it[0] != 0) and (it[0] != 15):
-                        i, j = it.multi_index
-                        if bin_mask_codes[i, j] != 0:
-                            self._draw_line(
-                                image_bgr,
-                                arr,
-                                mask,
-                                level,
-                                color_bgr,
-                                linewidth,
-                                it[0],
-                                it.multi_index,
-                                bbox_xywh,
-                                Nw,
-                                Nh,
-                                (i0, j0),
-                            )
-                    it.iternext()
-
-    def _draw_line(
-        self,
-        image_bgr,
-        arr,
-        mask,
-        v,
-        color_bgr,
-        linewidth,
-        bin_code,
-        multi_idx,
-        bbox_xywh,
-        Nw,
-        Nh,
-        offset,
-    ):
-        lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
-        x0, y0, w, h = bbox_xywh
-        x1 = x0 + w
-        y1 = y0 + h
-        for line in lines:
-            x0r, y0r = line[0]
-            x1r, y1r = line[1]
-            pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
-            pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
-            cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
-
-    def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
-        i0, j0 = offset
-        i, j = multi_idx
-        i += i0
-        j += j0
-        v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
-        x0i = float(j) / Nw
-        y0j = float(i) / Nh
-        He = 1.0 / Nh
-        We = 1.0 / Nw
-        if (bin_code == 1) or (bin_code == 14):
-            a = (v - v0) / (v1 - v0)
-            b = (v - v0) / (v3 - v0)
-            pt1 = (x0i, y0j + a * He)
-            pt2 = (x0i + b * We, y0j)
-            return [(pt1, pt2)]
-        elif (bin_code == 2) or (bin_code == 13):
-            a = (v - v0) / (v1 - v0)
-            b = (v - v1) / (v2 - v1)
-            pt1 = (x0i, y0j + a * He)
-            pt2 = (x0i + b * We, y0j + He)
-            return [(pt1, pt2)]
-        elif (bin_code == 3) or (bin_code == 12):
-            a = (v - v0) / (v3 - v0)
-            b = (v - v1) / (v2 - v1)
-            pt1 = (x0i + a * We, y0j)
-            pt2 = (x0i + b * We, y0j + He)
-            return [(pt1, pt2)]
-        elif (bin_code == 4) or (bin_code == 11):
-            a = (v - v1) / (v2 - v1)
-            b = (v - v3) / (v2 - v3)
-            pt1 = (x0i + a * We, y0j + He)
-            pt2 = (x0i + We, y0j + b * He)
-            return [(pt1, pt2)]
-        elif (bin_code == 6) or (bin_code == 9):
-            a = (v - v0) / (v1 - v0)
-            b = (v - v3) / (v2 - v3)
-            pt1 = (x0i, y0j + a * He)
-            pt2 = (x0i + We, y0j + b * He)
-            return [(pt1, pt2)]
-        elif (bin_code == 7) or (bin_code == 8):
-            a = (v - v0) / (v3 - v0)
-            b = (v - v3) / (v2 - v3)
-            pt1 = (x0i + a * We, y0j)
-            pt2 = (x0i + We, y0j + b * He)
-            return [(pt1, pt2)]
-        elif bin_code == 5:
-            a1 = (v - v0) / (v1 - v0)
-            b1 = (v - v1) / (v2 - v1)
-            pt11 = (x0i, y0j + a1 * He)
-            pt12 = (x0i + b1 * We, y0j + He)
-            a2 = (v - v0) / (v3 - v0)
-            b2 = (v - v3) / (v2 - v3)
-            pt21 = (x0i + a2 * We, y0j)
-            pt22 = (x0i + We, y0j + b2 * He)
-            return [(pt11, pt12), (pt21, pt22)]
-        elif bin_code == 10:
-            a1 = (v - v0) / (v3 - v0)
-            b1 = (v - v0) / (v1 - v0)
-            pt11 = (x0i + a1 * We, y0j)
-            pt12 = (x0i, y0j + b1 * He)
-            a2 = (v - v1) / (v2 - v1)
-            b2 = (v - v3) / (v2 - v3)
-            pt21 = (x0i + a2 * We, y0j + He)
-            pt22 = (x0i + We, y0j + b2 * He)
-            return [(pt11, pt12), (pt21, pt22)]
-        return []
-
-
-try:
-    import matplotlib
-
-    matplotlib.use("Agg")
-    DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
-except ModuleNotFoundError:
-    logger = logging.getLogger(__name__)
-    logger.warning("Could not import matplotlib, using custom contour visualizer")
-    DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
-
-
-class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
-            _extract_i_from_iuvarr,
-            _extract_i_from_iuvarr,
-            inplace,
-            cmap,
-            alpha,
-            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
-        )
-
-
-class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        super(DensePoseResultsUVisualizer, self).__init__(
-            _extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
-        )
-
-
-class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        super(DensePoseResultsVVisualizer, self).__init__(
-            _extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
-        )
-
-
-class DensePoseOutputsFineSegmentationVisualizer(object):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        self.mask_visualizer = MatrixVisualizer(
-            inplace=inplace,
-            cmap=cmap,
-            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
-            alpha=alpha,
-        )
-
-    def visualize(
-        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
-    ) -> Image:
-        if dp_output_with_bboxes is None:
-            return image_bgr
-        densepose_output, bboxes_xywh = dp_output_with_bboxes
-        S = densepose_output.S
-        I = densepose_output.I  # noqa
-        U = densepose_output.U
-        V = densepose_output.V
-        N = S.size(0)
-        assert N == I.size(
-            0
-        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
-            S.size(), I.size()
-        )
-        assert N == U.size(
-            0
-        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
-            S.size(), U.size()
-        )
-        assert N == V.size(
-            0
-        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
-            S.size(), V.size()
-        )
-        assert N == len(
-            bboxes_xywh
-        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
-            len(bboxes_xywh), N
-        )
-        for n in range(N):
-            Sn = S[n].argmax(dim=0)
-            In = I[n].argmax(dim=0) * (Sn > 0).long()
-            matrix = In.cpu().numpy().astype(np.uint8)
-            mask = np.zeros(matrix.shape, dtype=np.uint8)
-            mask[matrix > 0] = 1
-            bbox_xywh = bboxes_xywh[n]
-            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
-        return image_bgr
-
-
-class DensePoseOutputsUVisualizer(object):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        self.mask_visualizer = MatrixVisualizer(
-            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
-        )
-
-    def visualize(
-        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
-    ) -> Image:
-        if dp_output_with_bboxes is None:
-            return image_bgr
-        densepose_output, bboxes_xywh = dp_output_with_bboxes
-        assert isinstance(
-            densepose_output, DensePoseOutput
-        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
-        S = densepose_output.S
-        I = densepose_output.I  # noqa
-        U = densepose_output.U
-        V = densepose_output.V
-        N = S.size(0)
-        assert N == I.size(
-            0
-        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
-            S.size(), I.size()
-        )
-        assert N == U.size(
-            0
-        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
-            S.size(), U.size()
-        )
-        assert N == V.size(
-            0
-        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
-            S.size(), V.size()
-        )
-        assert N == len(
-            bboxes_xywh
-        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
-            len(bboxes_xywh), N
-        )
-        for n in range(N):
-            Sn = S[n].argmax(dim=0)
-            In = I[n].argmax(dim=0) * (Sn > 0).long()
-            segmentation = In.cpu().numpy().astype(np.uint8)
-            mask = np.zeros(segmentation.shape, dtype=np.uint8)
-            mask[segmentation > 0] = 1
-            Un = U[n].cpu().numpy().astype(np.float32)
-            Uvis = np.zeros(segmentation.shape, dtype=np.float32)
-            for partId in range(Un.shape[0]):
-                Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
-                bbox_xywh = bboxes_xywh[n]
-            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
-        return image_bgr
-
-
-class DensePoseOutputsVVisualizer(object):
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        self.mask_visualizer = MatrixVisualizer(
-            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
-        )
-
-    def visualize(
-        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
-    ) -> Image:
-        if dp_output_with_bboxes is None:
-            return image_bgr
-        densepose_output, bboxes_xywh = dp_output_with_bboxes
-        assert isinstance(
-            densepose_output, DensePoseOutput
-        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
-        S = densepose_output.S
-        I = densepose_output.I  # noqa
-        U = densepose_output.U
-        V = densepose_output.V
-        N = S.size(0)
-        assert N == I.size(
-            0
-        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
-            S.size(), I.size()
-        )
-        assert N == U.size(
-            0
-        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
-            S.size(), U.size()
-        )
-        assert N == V.size(
-            0
-        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
-            S.size(), V.size()
-        )
-        assert N == len(
-            bboxes_xywh
-        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
-            len(bboxes_xywh), N
-        )
-        for n in range(N):
-            Sn = S[n].argmax(dim=0)
-            In = I[n].argmax(dim=0) * (Sn > 0).long()
-            segmentation = In.cpu().numpy().astype(np.uint8)
-            mask = np.zeros(segmentation.shape, dtype=np.uint8)
-            mask[segmentation > 0] = 1
-            Vn = V[n].cpu().numpy().astype(np.float32)
-            Vvis = np.zeros(segmentation.shape, dtype=np.float32)
-            for partId in range(Vn.size(0)):
-                Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
-            bbox_xywh = bboxes_xywh[n]
-            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
-        return image_bgr
-
-
-class DensePoseDataCoarseSegmentationVisualizer(object):
-    """
-    Visualizer for ground truth segmentation
-    """
-
-    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
-        self.mask_visualizer = MatrixVisualizer(
-            inplace=inplace,
-            cmap=cmap,
-            val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
-            alpha=alpha,
-        )
-
-    def visualize(
-        self,
-        image_bgr: Image,
-        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
-    ) -> Image:
-        if bbox_densepose_datas is None:
-            return image_bgr
-        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
-            matrix = densepose_data.segm.numpy()
-            mask = np.zeros(matrix.shape, dtype=np.uint8)
-            mask[matrix > 0] = 1
-            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
-        return image_bgr
-
-
-class DensePoseDataPointsVisualizer(object):
-    def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
-        self.points_visualizer = PointsVisualizer()
-        self.densepose_data_to_value_fn = densepose_data_to_value_fn
-        self.cmap = cmap
-
-    def visualize(
-        self,
-        image_bgr: Image,
-        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
-    ) -> Image:
-        if bbox_densepose_datas is None:
-            return image_bgr
-        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
-            x0, y0, w, h = bbox_xywh.numpy()
-            x = densepose_data.x.numpy() * w / 255.0 + x0
-            y = densepose_data.y.numpy() * h / 255.0 + y0
-            pts_xy = zip(x, y)
-            if self.densepose_data_to_value_fn is None:
-                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
-            else:
-                v = self.densepose_data_to_value_fn(densepose_data)
-                img_colors_bgr = cv2.applyColorMap(v, self.cmap)
-                colors_bgr = [
-                    [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
-                ]
-                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
-        return image_bgr
-
-
-def _densepose_data_u_for_cmap(densepose_data):
-    u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
-    return u.astype(np.uint8)
-
-
-def _densepose_data_v_for_cmap(densepose_data):
-    v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
-    return v.astype(np.uint8)
-
-
-def _densepose_data_i_for_cmap(densepose_data):
-    i = (
-        np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
-        * 255.0
-        / DensePoseDataRelative.N_PART_LABELS
-    )
-    return i.astype(np.uint8)
-
-
-class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
-    def __init__(self):
-        super(DensePoseDataPointsUVisualizer, self).__init__(
-            densepose_data_to_value_fn=_densepose_data_u_for_cmap
-        )
-
-
-class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
-    def __init__(self):
-        super(DensePoseDataPointsVVisualizer, self).__init__(
-            densepose_data_to_value_fn=_densepose_data_v_for_cmap
-        )
-
-
-class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
-    def __init__(self):
-        super(DensePoseDataPointsIVisualizer, self).__init__(
-            densepose_data_to_value_fn=_densepose_data_i_for_cmap
-        )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
deleted file mode 100644
index b715a44..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-from typing import Sequence
-import torch
-
-from detectron2.layers.nms import batched_nms
-from detectron2.structures.instances import Instances
-
-from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
-from densepose.vis.densepose import DensePoseResultsVisualizer
-
-from .base import CompoundVisualizer
-
-Scores = Sequence[float]
-
-
-def extract_scores_from_instances(instances: Instances, select=None):
-    if instances.has("scores"):
-        return instances.scores if select is None else instances.scores[select]
-    return None
-
-
-def extract_boxes_xywh_from_instances(instances: Instances, select=None):
-    if instances.has("pred_boxes"):
-        boxes_xywh = instances.pred_boxes.tensor.clone()
-        boxes_xywh[:, 2] -= boxes_xywh[:, 0]
-        boxes_xywh[:, 3] -= boxes_xywh[:, 1]
-        return boxes_xywh if select is None else boxes_xywh[select]
-    return None
-
-
-def create_extractor(visualizer: object):
-    """
-    Create an extractor for the provided visualizer
-    """
-    if isinstance(visualizer, CompoundVisualizer):
-        extractors = [create_extractor(v) for v in visualizer.visualizers]
-        return CompoundExtractor(extractors)
-    elif isinstance(visualizer, DensePoseResultsVisualizer):
-        return DensePoseResultExtractor()
-    elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
-        return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
-    elif isinstance(visualizer, BoundingBoxVisualizer):
-        return extract_boxes_xywh_from_instances
-    else:
-        logger = logging.getLogger(__name__)
-        logger.error(f"Could not create extractor for {visualizer}")
-        return None
-
-
-class BoundingBoxExtractor(object):
-    """
-    Extracts bounding boxes from instances
-    """
-
-    def __call__(self, instances: Instances):
-        boxes_xywh = extract_boxes_xywh_from_instances(instances)
-        return boxes_xywh
-
-
-class ScoredBoundingBoxExtractor(object):
-    """
-    Extracts bounding boxes from instances
-    """
-
-    def __call__(self, instances: Instances, select=None):
-        scores = extract_scores_from_instances(instances)
-        boxes_xywh = extract_boxes_xywh_from_instances(instances)
-        if (scores is None) or (boxes_xywh is None):
-            return (boxes_xywh, scores)
-        if select is not None:
-            scores = scores[select]
-            boxes_xywh = boxes_xywh[select]
-        return (boxes_xywh, scores)
-
-
-class DensePoseResultExtractor(object):
-    """
-    Extracts DensePose result from instances
-    """
-
-    def __call__(self, instances: Instances, select=None):
-        boxes_xywh = extract_boxes_xywh_from_instances(instances)
-        if instances.has("pred_densepose") and (boxes_xywh is not None):
-            dpout = instances.pred_densepose
-            if select is not None:
-                dpout = dpout[select]
-                boxes_xywh = boxes_xywh[select]
-            return dpout.to_result(boxes_xywh)
-        else:
-            return None
-
-
-class CompoundExtractor(object):
-    """
-    Extracts data for CompoundVisualizer
-    """
-
-    def __init__(self, extractors):
-        self.extractors = extractors
-
-    def __call__(self, instances: Instances, select=None):
-        datas = []
-        for extractor in self.extractors:
-            data = extractor(instances, select)
-            datas.append(data)
-        return datas
-
-
-class NmsFilteredExtractor(object):
-    """
-    Extracts data in the format accepted by NmsFilteredVisualizer
-    """
-
-    def __init__(self, extractor, iou_threshold):
-        self.extractor = extractor
-        self.iou_threshold = iou_threshold
-
-    def __call__(self, instances: Instances, select=None):
-        scores = extract_scores_from_instances(instances)
-        boxes_xywh = extract_boxes_xywh_from_instances(instances)
-        if boxes_xywh is None:
-            return None
-        select_local_idx = batched_nms(
-            boxes_xywh,
-            scores,
-            torch.zeros(len(scores), dtype=torch.int32),
-            iou_threshold=self.iou_threshold,
-        ).squeeze()
-        select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
-        select_local[select_local_idx] = True
-        select = select_local if select is None else (select & select_local)
-        return self.extractor(instances, select=select)
-
-
-class ScoreThresholdedExtractor(object):
-    """
-    Extracts data in the format accepted by ScoreThresholdedVisualizer
-    """
-
-    def __init__(self, extractor, min_score):
-        self.extractor = extractor
-        self.min_score = min_score
-
-    def __call__(self, instances: Instances, select=None):
-        scores = extract_scores_from_instances(instances)
-        if scores is None:
-            return None
-        select_local = scores > self.min_score
-        select = select_local if select is None else (select & select_local)
-        data = self.extractor(instances, select=select)
-        return data
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md
deleted file mode 100644
index e3a94b6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-## Some scripts for developers to use, include:
-
-- `run_instant_tests.sh`: run training for a few iterations.
-- `run_inference_tests.sh`: run inference on a small dataset.
-- `../../dev/linter.sh`: lint the codebase before commit
-- `../../dev/parse_results.sh`: parse results from log file.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh
deleted file mode 100644
index 34f47d5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-BIN="python train_net.py"
-OUTPUT="inference_test_output"
-NUM_GPUS=2
-IMS_PER_GPU=2
-IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU ))
-
-CFG_LIST=( "${@:1}" )
-
-if [ ${#CFG_LIST[@]} -eq 0 ]; then
-  CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
-fi
-
-echo "========================================================================"
-echo "Configs to run:"
-echo "${CFG_LIST[@]}"
-echo "========================================================================"
-
-for cfg in "${CFG_LIST[@]}"; do
-    echo "========================================================================"
-    echo "Running $cfg ..."
-    echo "========================================================================"
-    $BIN \
-      --eval-only \
-      --num-gpus $NUM_GPUS \
-      --config-file "$cfg" \
-      OUTPUT_DIR "$OUTPUT" \
-      SOLVER.IMS_PER_BATCH $IMS_PER_BATCH
-    rm -rf $OUTPUT
-done
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh
deleted file mode 100644
index a537851..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash -e
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-BIN="python train_net.py"
-OUTPUT="instant_test_output"
-NUM_GPUS=2
-SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2))
-
-CFG_LIST=( "${@:1}" )
-if [ ${#CFG_LIST[@]} -eq 0 ]; then
-  CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
-fi
-
-echo "========================================================================"
-echo "Configs to run:"
-echo "${CFG_LIST[@]}"
-echo "========================================================================"
-
-for cfg in "${CFG_LIST[@]}"; do
-    echo "========================================================================"
-    echo "Running $cfg ..."
-    echo "========================================================================"
-    $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
-      SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \
-      OUTPUT_DIR "$OUTPUT"
-    rm -rf "$OUTPUT"
-done
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md
deleted file mode 100644
index a6bcbed..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Getting Started with DensePose
-
-## Inference with Pre-trained Models
-
-1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml)
-2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run:
-```bash
-python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png
-```
-Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool.
-
-## Training
-
-First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts:
-<pre>
-datasets/coco/
-  annotations/
-    densepose_{train,minival,valminusminival}2014.json
-    <a href="https://dl.fbaipublicfiles.com/detectron2/densepose/densepose_minival2014_100.json">densepose_minival2014_100.json </a>  (optional, for testing only)
-  {train,val}2014/
-    # image files that are mentioned in the corresponding json
-</pre>
-
-To train a model one can use the [train_net.py](../train_net.py) script.
-This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md).
-For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone
-on 8 GPUs following the s1x schedule, one can run
-```bash
-python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8
-```
-The configs are made for 8-GPU training. To train on 1 GPU, one can apply the
-[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677):
-```bash
-python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
-    SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
-```
-
-## Evaluation
-
-Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
-model location specification through `MODEL.WEIGHTS model.pth` in the command line
-```bash
-python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
-    --eval-only MODEL.WEIGHTS model.pth
-```
-
-## Tools
-
-We provide tools which allow one to:
- - easily view DensePose annotated data in a dataset;
- - perform DensePose inference on a set of images;
- - visualize DensePose model results;
-
-`query_db` is a tool to print or visualize DensePose data in a dataset.
-Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool
-
-`apply_net` is a tool to print or visualize DensePose results.
-Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md
deleted file mode 100644
index c263084..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md
+++ /dev/null
@@ -1,277 +0,0 @@
-# Model Zoo and Baselines
-
-# Introduction
-
-We provide baselines trained with Detectron2 DensePose. The corresponding
-configuration files can be found in the [configs](../configs) directory.
-All models were trained on COCO `train2014` + `valminusminival2014` and
-evaluated on COCO `minival2014`. For the details on common settings in which
-baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md).
-
-## License
-
-All models available for download through this document are licensed under the
-[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
-
-## COCO DensePose Baselines with DensePose-RCNN
-
-### Legacy Models
-
-Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">dp. AP<br/>GPS</th>
-<th valign="bottom">dp. AP<br/>GPSm</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
-<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
- <td align="center">s1x</td>
- <td align="center">0.307</td>
- <td align="center">0.051</td>
- <td align="center">3.2</td>
- <td align="center">58.1</td>
- <td align="center">52.1</td>
- <td align="center">54.9</td>
- <td align="center">164832157</td>
- <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
- </tr>
- <!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
-  <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
-  <td align="center">s1x</td>
-  <td align="center">0.390</td>
-  <td align="center">0.063</td>
-  <td align="center">4.3</td>
-  <td align="center">59.5</td>
-  <td align="center">53.2</td>
-  <td align="center">56.1</td>
-  <td align="center">164832182</td>
-  <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
-  </tr>
-</tbody></table>
-
-### Improved Baselines, Original Fully Convolutional Haad
-
-These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
-
-<table><tbody>
-  <!-- START TABLE -->
-  <!-- TABLE HEADER -->
-  <th valign="bottom">Name</th>
-  <th valign="bottom">lr<br/>sched</th>
-  <th valign="bottom">train<br/>time<br/>(s/iter)</th>
-  <th valign="bottom">inference<br/>time<br/>(s/im)</th>
-  <th valign="bottom">train<br/>mem<br/>(GB)</th>
-  <th valign="bottom">box<br/>AP</th>
-  <th valign="bottom">dp. AP<br/>GPS</th>
-  <th valign="bottom">dp. AP<br/>GPSm</th>
-  <th valign="bottom">model id</th>
-  <th valign="bottom">download</th>
-  <!-- TABLE BODY -->
-  <!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
-   <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
-   <td align="center">s1x</td>
-   <td align="center">0.359</td>
-   <td align="center">0.066</td>
-   <td align="center">4.5</td>
-   <td align="center">61.2</td>
-   <td align="center">63.7</td>
-   <td align="center">65.3</td>
-   <td align="center">165712039</td>
-   <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
-   </tr>
-   <!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
-    <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
-    <td align="center">s1x</td>
-    <td align="center">0.428</td>
-    <td align="center">0.079</td>
-    <td align="center">5.8</td>
-    <td align="center">62.3</td>
-    <td align="center">64.5</td>
-    <td align="center">66.4</td>
-    <td align="center">165712084</td>
-    <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
-    </tr>
-    </tbody></table>
-
-### Improved Baselines, DeepLabV3 Head
-
-These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
-
-<table><tbody>
-    <!-- START TABLE -->
-    <!-- TABLE HEADER -->
-    <th valign="bottom">Name</th>
-    <th valign="bottom">lr<br/>sched</th>
-    <th valign="bottom">train<br/>time<br/>(s/iter)</th>
-    <th valign="bottom">inference<br/>time<br/>(s/im)</th>
-    <th valign="bottom">train<br/>mem<br/>(GB)</th>
-    <th valign="bottom">box<br/>AP</th>
-    <th valign="bottom">dp. AP<br/>GPS</th>
-    <th valign="bottom">dp. AP<br/>GPSm</th>
-    <th valign="bottom">model id</th>
-    <th valign="bottom">download</th>
-    <!-- TABLE BODY -->
-    <!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
-     <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
-     <td align="center">s1x</td>
-     <td align="center">0.392</td>
-     <td align="center">0.070</td>
-     <td align="center">6.7</td>
-     <td align="center">61.1</td>
-     <td align="center">65.6</td>
-     <td align="center">66.8</td>
-     <td align="center">165712097</td>
-     <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
-     </tr>
-     <!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
-      <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
-      <td align="center">s1x</td>
-      <td align="center">0.478</td>
-      <td align="center">0.083</td>
-      <td align="center">7.0</td>
-      <td align="center">62.3</td>
-      <td align="center">66.3</td>
-      <td align="center">67.7</td>
-      <td align="center">165712116</td>
-      <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
-      </tr>
-</tbody></table>
-
-### Baselines with Confidence Estimation
-
-These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels).
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Name</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">train<br/>time<br/>(s/iter)</th>
-<th valign="bottom">inference<br/>time<br/>(s/im)</th>
-<th valign="bottom">train<br/>mem<br/>(GB)</th>
-<th valign="bottom">box<br/>AP</th>
-<th valign="bottom">dp. AP<br/>GPS</th>
-<th valign="bottom">dp. AP<br/>GPSm</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY --> 
-<!-- ROW: densepose_rcnn_R_50_FPN_WC1_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml">R_50_FPN_WC1_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.353</td>
-<td align="center">0.064</td>
-<td align="center">4.6</td>
-<td align="center">60.5</td>
-<td align="center">64.2</td>
-<td align="center">65.6</td>
-<td align="center">173862049</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/model_final_289019.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_50_FPN_WC2_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml">R_50_FPN_WC2_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.364</td>
-<td align="center">0.066</td>
-<td align="center">4.8</td>
-<td align="center">60.7</td>
-<td align="center">64.2</td>
-<td align="center">65.7</td>
-<td align="center">173861455</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/model_final_3abe14.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC1_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml">R_50_FPN_DL_WC1_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.397</td>
-<td align="center">0.068</td>
-<td align="center">6.7</td>
-<td align="center">61.1</td>
-<td align="center">65.8</td>
-<td align="center">67.1</td>
-<td align="center">173067973</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/model_final_b1e525.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC2_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml">R_50_FPN_DL_WC2_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.410</td>
-<td align="center">0.070</td>
-<td align="center">6.8</td>
-<td align="center">60.8</td>
-<td align="center">65.6</td>
-<td align="center">66.7</td>
-<td align="center">173859335</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/model_final_60fed4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_101_FPN_WC1_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml">R_101_FPN_WC1_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.435</td>
-<td align="center">0.076</td>
-<td align="center">5.7</td>
-<td align="center">62.5</td>
-<td align="center">64.9</td>
-<td align="center">66.5</td>
-<td align="center">171402969</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/model_final_9e47f0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_101_FPN_WC2_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml">R_101_FPN_WC2_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.450</td>
-<td align="center">0.078</td>
-<td align="center">5.7</td>
-<td align="center">62.3</td>
-<td align="center">64.8</td>
-<td align="center">66.6</td>
-<td align="center">173860702</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/model_final_5ea023.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC1_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml">R_101_FPN_DL_WC1_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.479</td>
-<td align="center">0.081</td>
-<td align="center">7.9</td>
-<td align="center">62.0</td>
-<td align="center">66.2</td>
-<td align="center">67.4</td>
-<td align="center">173858525</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/model_final_f359f3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/metrics.json">metrics</a></td>
-</tr>
-<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC2_s1x --> 
- <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml">R_101_FPN_DL_WC2_s1x</a></td>
-<td align="center">s1x</td>
-<td align="center">0.491</td>
-<td align="center">0.082</td>
-<td align="center">7.6</td>
-<td align="center">61.7</td>
-<td align="center">65.9</td>
-<td align="center">67.3</td>
-<td align="center">173294801</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/model_final_6e1ed1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-## Old Baselines
-
-It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose).
-Below are evaluation metrics for the baselines recomputed in the current framework:
-
-| Model | bbox AP | AP  |  AP50 | AP75  | APm  |APl |
-|-----|-----|-----|---    |---    |---   |--- |
-| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 |
-| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 |
-
-Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md),
-which is due to small incompatibilities between the frameworks.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md
deleted file mode 100644
index f5cf257..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Apply Net
-
-`apply_net` is a tool to print or visualize DensePose results on a set of images.
-It has two modes: `dump` to save DensePose model results to a pickle file
-and `show` to visualize them on images.
-
-## Dump Mode
-
-The general command form is:
-```bash
-python apply_net.py dump [-h] [-v] [--output <dump_file>] <config> <model> <input>
-```
-
-There are three mandatory arguments:
- - `<config>`, configuration file for a given model;
- - `<model>`, model file with trained parameters
- - `<input>`, input image file name, pattern or folder
-
-One can additionally provide `--output` argument to define the output file name,
-which defaults to `output.pkl`.
-
-
-Examples:
-
-1. Dump results of a DensePose model with ResNet-50 FPN backbone for images
-   in a folder `images` to file `dump.pkl`:
-```bash
-python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v
-```
-
-2. Dump results of a DensePose model with ResNet-50 FPN backbone for images
-   with file name matching a pattern `image*.jpg` to file `results.pkl`:
-```bash
-python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v
-```
-
-If you want to load the pickle file generated by the above command:
-```
-# make sure DensePose is in your PYTHONPATH, or use the following line to add it:
-sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/")
-
-f = open('/your_result_path/results.pkl', 'rb')
-data = pickle.load(f)
-```
-
-The file `results.pkl` contains the list of results per image, for each image the result is a dictionary:
-```
-data: [{'file_name': '/your_path/image1.jpg',
-        'scores': tensor([0.9884]),
-        'pred_boxes_XYXY': tensor([[ 69.6114,   0.0000, 706.9797, 706.0000]]),
-        'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f791b312470>},
-       {'file_name': '/your_path/image2.jpg',
-        'scores': tensor([0.9999, 0.5373, 0.3991]),
-        'pred_boxes_XYXY': tensor([[ 59.5734,   7.7535, 579.9311, 932.3619],
-                                   [612.9418, 686.1254, 612.9999, 704.6053],
-                                   [164.5081, 407.4034, 598.3944, 920.4266]]),
-        'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f7071229be0>}]
-```
-
-We can use the following code, to parse the outputs of the first
-detected instance on the first image.
-```
-img_id, instance_id = 0, 0  # Look at the first image and the first detected instance
-bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id]
-result_encoded = data[img_id]['pred_densepose'].results[instance_id]
-iuv_arr = DensePoseResult.decode_png_data(*result_encoded)
-```
-The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box.
-
-The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box.
-- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on.
-- `iuv_arr[1,:,:]`: The U-coordinate value of image points.
-- `iuv_arr[2,:,:]`: The V-coordinate value of image points.
-
-
-## Visualization Mode
-
-The general command form is:
-```bash
-python apply_net.py show [-h] [-v] [--min_score <score>] [--nms_thresh <threshold>] [--output <image_file>] <config> <model> <input> <visualizations>
-```
-
-There are four mandatory arguments:
- - `<config>`, configuration file for a given model;
- - `<model>`, model file with trained parameters
- - `<input>`, input image file name, pattern or folder
- - `<visualizations>`, visualizations specifier; currently available visualizations are:
-   * `bbox` - bounding boxes of detected persons;
-   * `dp_segm` - segmentation masks for detected persons;
-   * `dp_u` - each body part is colored according to the estimated values of the
-     U coordinate in part parameterization;
-   * `dp_v` - each body part is colored according to the estimated values of the
-     V coordinate in part parameterization;
-   * `dp_contour` - plots contours with color-coded U and V coordinates
-
-
-One can additionally provide the following optional arguments:
- - `--min_score` to only show detections with sufficient scores that are not lower than provided value
- - `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold
- - `--output` to define visualization file name template, which defaults to `output.png`.
-   To distinguish output file names for different images, the tool appends 1-based entry index,
-   e.g. output.0001.png, output.0002.png, etc...
-
-
-The following examples show how to output results of a DensePose model
-with ResNet-50 FPN backbone using different visualizations for image `image.jpg`:
-
-1. Show bounding box and segmentation:
-```bash
-python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v
-```
-![Bounding Box + Segmentation Visualization](images/res_bbox_dp_segm.jpg)
-
-2. Show bounding box and estimated U coordinates for body parts:
-```bash
-python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v
-```
-![Bounding Box + U Coordinate Visualization](images/res_bbox_dp_u.jpg)
-
-3. Show bounding box and estimated V coordinates for body parts:
-```bash
-python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v
-```
-![Bounding Box + V Coordinate Visualization](images/res_bbox_dp_v.jpg)
-
-4. Show bounding box and estimated U and V coordinates via contour plots:
-```bash
-python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v
-```
-![Bounding Box + Contour Visualization](images/res_bbox_dp_contour.jpg)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md
deleted file mode 100644
index b0a764b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md
+++ /dev/null
@@ -1,105 +0,0 @@
-
-# Query Dataset
-
-`query_db` is a tool to print or visualize DensePose data from a dataset.
-It has two modes: `print` and `show` to output dataset entries to standard
-output or to visualize them on images.
-
-## Print Mode
-
-The general command form is:
-```bash
-python query_db.py print [-h] [-v] [--max-entries N] <dataset> <selector>
-```
-
-There are two mandatory arguments:
- - `<dataset>`, DensePose dataset specification, from which to select
-   the entries (e.g. `densepose_coco_2014_train`).
- - `<selector>`, dataset entry selector which can be a single specification,
-   or a comma-separated list of specifications of the form
-   `field[:type]=value` for exact match with the value
-   or `field[:type]=min-max` for a range of values
-
-One can additionally limit the maximum number of entries to output
-by providing `--max-entries` argument.
-
-Examples:
-
-1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset:
-```bash
-python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v
-```
-
-2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`: 
-```bash
-python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v
-```
-
-3. Output all entries with `image_id` between 36 and 156:
-```bash
-python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v
-```
-
-## Visualization Mode
-
-The general command form is:
-```bash
-python query_db.py show [-h] [-v] [--max-entries N] [--output <image_file>] <dataset> <selector> <visualizations>
-```
-
-There are three mandatory arguments:
- - `<dataset>`, DensePose dataset specification, from which to select
-   the entries (e.g. `densepose_coco_2014_train`).
- - `<selector>`, dataset entry selector which can be a single specification,
-   or a comma-separated list of specifications of the form
-   `field[:type]=value` for exact match with the value
-   or `field[:type]=min-max` for a range of values
- - `<visualizations>`, visualizations specifier; currently available visualizations are:
-   * `bbox` - bounding boxes of annotated persons;
-   * `dp_i` - annotated points colored according to the containing part;
-   * `dp_pts` - annotated points in green color;
-   * `dp_segm` - segmentation masks for annotated persons;
-   * `dp_u` - annotated points colored according to their U coordinate in part parameterization;
-   * `dp_v` - annotated points colored according to their V coordinate in part parameterization;
-
-One can additionally provide one of the two optional arguments:
- - `--max_entries` to limit the maximum number of entries to visualize
- - `--output` to provide visualization file name template, which defaults
-   to `output.png`. To distinguish file names for different dataset
-   entries, the tool appends 1-based entry index to the output file name,
-   e.g. output.0001.png, output.0002.png, etc.
-
-The following examples show how to output different visualizations for image with `id = 322`
-from `densepose_coco_2014_train` dataset:
-
-1. Show bounding box and segmentation:
-```bash
-python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
-```
-![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg)
-
-2. Show bounding box and points colored according to the containing part:
-```bash
-python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v
-```
-![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg)
-
-3. Show bounding box and annotated points in green color:
-```bash
-python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
-```
-![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg)
-
-4. Show bounding box and annotated points colored according to their U coordinate in part parameterization:
-```bash
-python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v
-```
-![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg)
-
-5. Show bounding box and annotated points colored according to their V coordinate in part parameterization:
-```bash
-python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v
-```
-![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg)
-
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py
deleted file mode 100644
index 6d3ea2f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import argparse
-import logging
-import os
-import sys
-from timeit import default_timer as timer
-from typing import Any, ClassVar, Dict, List
-import torch
-from fvcore.common.file_io import PathManager
-
-from detectron2.data.catalog import DatasetCatalog
-from detectron2.utils.logger import setup_logger
-
-from densepose.data.structures import DensePoseDataRelative
-from densepose.utils.dbhelper import EntrySelector
-from densepose.utils.logger import verbosity_to_level
-from densepose.vis.base import CompoundVisualizer
-from densepose.vis.bounding_box import BoundingBoxVisualizer
-from densepose.vis.densepose import (
-    DensePoseDataCoarseSegmentationVisualizer,
-    DensePoseDataPointsIVisualizer,
-    DensePoseDataPointsUVisualizer,
-    DensePoseDataPointsVisualizer,
-    DensePoseDataPointsVVisualizer,
-)
-
-DOC = """Query DB - a tool to print / visualize data from a database
-"""
-
-LOGGER_NAME = "query_db"
-
-logger = logging.getLogger(LOGGER_NAME)
-
-_ACTION_REGISTRY: Dict[str, "Action"] = {}
-
-
-class Action(object):
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        parser.add_argument(
-            "-v",
-            "--verbosity",
-            action="count",
-            help="Verbose mode. Multiple -v options increase the verbosity.",
-        )
-
-
-def register_action(cls: type):
-    """
-    Decorator for action classes to automate action registration
-    """
-    global _ACTION_REGISTRY
-    _ACTION_REGISTRY[cls.COMMAND] = cls
-    return cls
-
-
-class EntrywiseAction(Action):
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(EntrywiseAction, cls).add_arguments(parser)
-        parser.add_argument(
-            "dataset", metavar="<dataset>", help="Dataset name (e.g. densepose_coco_2014_train)"
-        )
-        parser.add_argument(
-            "selector",
-            metavar="<selector>",
-            help="Dataset entry selector in the form field1[:type]=value1[,"
-            "field2[:type]=value_min-value_max...] which selects all "
-            "entries from the dataset that satisfy the constraints",
-        )
-        parser.add_argument(
-            "--max-entries", metavar="N", help="Maximum number of entries to process", type=int
-        )
-
-    @classmethod
-    def execute(cls: type, args: argparse.Namespace):
-        dataset = setup_dataset(args.dataset)
-        entry_selector = EntrySelector.from_string(args.selector)
-        context = cls.create_context(args)
-        if args.max_entries is not None:
-            for _, entry in zip(range(args.max_entries), dataset):
-                if entry_selector(entry):
-                    cls.execute_on_entry(entry, context)
-        else:
-            for entry in dataset:
-                if entry_selector(entry):
-                    cls.execute_on_entry(entry, context)
-
-    @classmethod
-    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
-        context = {}
-        return context
-
-
-@register_action
-class PrintAction(EntrywiseAction):
-    """
-    Print action that outputs selected entries to stdout
-    """
-
-    COMMAND: ClassVar[str] = "print"
-
-    @classmethod
-    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
-        parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
-        cls.add_arguments(parser)
-        parser.set_defaults(func=cls.execute)
-
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(PrintAction, cls).add_arguments(parser)
-
-    @classmethod
-    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
-        import pprint
-
-        printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
-        printer.pprint(entry)
-
-
-@register_action
-class ShowAction(EntrywiseAction):
-    """
-    Show action that visualizes selected entries on an image
-    """
-
-    COMMAND: ClassVar[str] = "show"
-    VISUALIZERS: ClassVar[Dict[str, object]] = {
-        "dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
-        "dp_i": DensePoseDataPointsIVisualizer(),
-        "dp_u": DensePoseDataPointsUVisualizer(),
-        "dp_v": DensePoseDataPointsVVisualizer(),
-        "dp_pts": DensePoseDataPointsVisualizer(),
-        "bbox": BoundingBoxVisualizer(),
-    }
-
-    @classmethod
-    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
-        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
-        cls.add_arguments(parser)
-        parser.set_defaults(func=cls.execute)
-
-    @classmethod
-    def add_arguments(cls: type, parser: argparse.ArgumentParser):
-        super(ShowAction, cls).add_arguments(parser)
-        parser.add_argument(
-            "visualizations",
-            metavar="<visualizations>",
-            help="Comma separated list of visualizations, possible values: "
-            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
-        )
-        parser.add_argument(
-            "--output",
-            metavar="<image_file>",
-            default="output.png",
-            help="File name to save output to",
-        )
-
-    @classmethod
-    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
-        import cv2
-        import numpy as np
-
-        image_fpath = PathManager.get_local_path(entry["file_name"])
-        image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
-        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
-        datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
-        visualizer = context["visualizer"]
-        image_vis = visualizer.visualize(image, datas)
-        entry_idx = context["entry_idx"] + 1
-        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
-        cv2.imwrite(out_fname, image_vis)
-        logger.info(f"Output saved to {out_fname}")
-        context["entry_idx"] += 1
-
-    @classmethod
-    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
-        base, ext = os.path.splitext(fname_base)
-        return base + ".{0:04d}".format(entry_idx) + ext
-
-    @classmethod
-    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
-        vis_specs = args.visualizations.split(",")
-        visualizers = []
-        for vis_spec in vis_specs:
-            vis = cls.VISUALIZERS[vis_spec]
-            visualizers.append(vis)
-        context = {
-            "vis_specs": vis_specs,
-            "visualizer": CompoundVisualizer(visualizers),
-            "out_fname": args.output,
-            "entry_idx": 0,
-        }
-        return context
-
-    @classmethod
-    def _extract_data_for_visualizers_from_entry(
-        cls: type, vis_specs: List[str], entry: Dict[str, Any]
-    ):
-        dp_list = []
-        bbox_list = []
-        for annotation in entry["annotations"]:
-            is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
-            if not is_valid:
-                continue
-            bbox = torch.as_tensor(annotation["bbox"])
-            bbox_list.append(bbox)
-            dp_data = DensePoseDataRelative(annotation)
-            dp_list.append(dp_data)
-        datas = []
-        for vis_spec in vis_specs:
-            datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
-        return datas
-
-
-def setup_dataset(dataset_name):
-    logger.info("Loading dataset {}".format(dataset_name))
-    start = timer()
-    dataset = DatasetCatalog.get(dataset_name)
-    stop = timer()
-    logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
-    return dataset
-
-
-def create_argument_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        description=DOC,
-        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
-    )
-    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
-    subparsers = parser.add_subparsers(title="Actions")
-    for _, action in _ACTION_REGISTRY.items():
-        action.add_parser(subparsers)
-    return parser
-
-
-def main():
-    parser = create_argument_parser()
-    args = parser.parse_args()
-    verbosity = args.verbosity if hasattr(args, "verbosity") else None
-    global logger
-    logger = setup_logger(name=LOGGER_NAME)
-    logger.setLevel(verbosity_to_level(verbosity))
-    args.func(args)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py
deleted file mode 100644
index 13bf0dd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import os
-import torch
-
-from detectron2.config import get_cfg
-from detectron2.engine import default_setup
-from detectron2.modeling import build_model
-
-from densepose import add_dataset_category_config, add_densepose_config
-
-_BASE_CONFIG_DIR = "configs"
-_EVOLUTION_CONFIG_SUB_DIR = "evolution"
-_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules"
-_BASE_CONFIG_FILE_PREFIX = "Base-"
-_CONFIG_FILE_EXT = ".yaml"
-
-
-def _get_base_config_dir():
-    """
-    Return the base directory for configurations
-    """
-    return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR)
-
-
-def _get_evolution_config_dir():
-    """
-    Return the base directory for evolution configurations
-    """
-    return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR)
-
-
-def _get_quick_schedules_config_dir():
-    """
-    Return the base directory for quick schedules configurations
-    """
-    return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR)
-
-
-def _collect_config_files(config_dir):
-    """
-    Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory
-    """
-    start = _get_base_config_dir()
-    results = []
-    for entry in os.listdir(config_dir):
-        path = os.path.join(config_dir, entry)
-        if not os.path.isfile(path):
-            continue
-        _, ext = os.path.splitext(entry)
-        if ext != _CONFIG_FILE_EXT:
-            continue
-        if entry.startswith(_BASE_CONFIG_FILE_PREFIX):
-            continue
-        config_file = os.path.relpath(path, start)
-        results.append(config_file)
-    return results
-
-
-def get_config_files():
-    """
-    Get all the configuration files (relative to the base configuration directory)
-    """
-    return _collect_config_files(_get_base_config_dir())
-
-
-def get_evolution_config_files():
-    """
-    Get all the evolution configuration files (relative to the base configuration directory)
-    """
-    return _collect_config_files(_get_evolution_config_dir())
-
-
-def get_quick_schedules_config_files():
-    """
-    Get all the quick schedules configuration files (relative to the base configuration directory)
-    """
-    return _collect_config_files(_get_quick_schedules_config_dir())
-
-
-def _get_model_config(config_file):
-    """
-    Load and return the configuration from the specified file (relative to the base configuration
-    directory)
-    """
-    cfg = get_cfg()
-    add_dataset_category_config(cfg)
-    add_densepose_config(cfg)
-    path = os.path.join(_get_base_config_dir(), config_file)
-    cfg.merge_from_file(path)
-    if not torch.cuda.is_available():
-        cfg.MODEL_DEVICE = "cpu"
-    return cfg
-
-
-def get_model(config_file):
-    """
-    Get the model from the specified file (relative to the base configuration directory)
-    """
-    cfg = _get_model_config(config_file)
-    return build_model(cfg)
-
-
-def setup(config_file):
-    """
-    Setup the configuration from the specified file (relative to the base configuration directory)
-    """
-    cfg = _get_model_config(config_file)
-    cfg.freeze()
-    default_setup(cfg, {})
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py
deleted file mode 100644
index eed1310..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import unittest
-import torch
-
-from detectron2.structures import BitMasks, Boxes, Instances
-
-from .common import get_model
-
-
-# TODO(plabatut): Modularize detectron2 tests and re-use
-def make_model_inputs(image, instances=None):
-    if instances is None:
-        return {"image": image}
-
-    return {"image": image, "instances": instances}
-
-
-def make_empty_instances(h, w):
-    instances = Instances((h, w))
-    instances.gt_boxes = Boxes(torch.rand(0, 4))
-    instances.gt_classes = torch.tensor([]).to(dtype=torch.int64)
-    instances.gt_masks = BitMasks(torch.rand(0, h, w))
-    return instances
-
-
-class ModelE2ETest(unittest.TestCase):
-    CONFIG_PATH = ""
-
-    def setUp(self):
-        self.model = get_model(self.CONFIG_PATH)
-
-    def _test_eval(self, sizes):
-        inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes]
-        self.model.eval()
-        self.model(inputs)
-
-
-class DensePoseRCNNE2ETest(ModelE2ETest):
-    CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml"
-
-    def test_empty_data(self):
-        self._test_eval([(200, 250), (200, 249)])
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py
deleted file mode 100644
index 96827f1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import unittest
-
-from .common import (
-    get_config_files,
-    get_evolution_config_files,
-    get_quick_schedules_config_files,
-    setup,
-)
-
-
-class TestSetup(unittest.TestCase):
-    def _test_setup(self, config_file):
-        setup(config_file)
-
-    def test_setup_configs(self):
-        config_files = get_config_files()
-        for config_file in config_files:
-            self._test_setup(config_file)
-
-    def test_setup_evolution_configs(self):
-        config_files = get_evolution_config_files()
-        for config_file in config_files:
-            self._test_setup(config_file)
-
-    def test_setup_quick_schedules_configs(self):
-        config_files = get_quick_schedules_config_files()
-        for config_file in config_files:
-            self._test_setup(config_file)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py
deleted file mode 100644
index ad97c23..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import unittest
-
-from densepose.data.structures import normalized_coords_transform
-
-
-class TestStructures(unittest.TestCase):
-    def test_normalized_coords_transform(self):
-        bbox = (32, 24, 288, 216)
-        x0, y0, w, h = bbox
-        xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h
-        f = normalized_coords_transform(*bbox)
-        # Top-left
-        expected_p, actual_p = (-1, -1), f((xmin, ymin))
-        self.assertEqual(expected_p, actual_p)
-        # Top-right
-        expected_p, actual_p = (1, -1), f((xmax, ymin))
-        self.assertEqual(expected_p, actual_p)
-        # Bottom-left
-        expected_p, actual_p = (-1, 1), f((xmin, ymax))
-        self.assertEqual(expected_p, actual_p)
-        # Bottom-right
-        expected_p, actual_p = (1, 1), f((xmax, ymax))
-        self.assertEqual(expected_p, actual_p)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py
deleted file mode 100644
index 9d2e7bd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-DensePose Training Script.
-
-This script is similar to the training script in detectron2/tools.
-
-It is an example of how a user might use detectron2 for a new project.
-"""
-
-import logging
-import os
-from collections import OrderedDict
-from fvcore.common.file_io import PathManager
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import CfgNode, get_cfg
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
-from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
-from detectron2.modeling import DatasetMapperTTA
-from detectron2.utils.logger import setup_logger
-
-from densepose import (
-    DensePoseCOCOEvaluator,
-    DensePoseGeneralizedRCNNWithTTA,
-    add_dataset_category_config,
-    add_densepose_config,
-    load_from_cfg,
-)
-from densepose.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader
-
-
-class Trainer(DefaultTrainer):
-    @classmethod
-    def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
-        if cfg.MODEL.DENSEPOSE_ON:
-            evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
-        return DatasetEvaluators(evaluators)
-
-    @classmethod
-    def build_test_loader(cls, cfg: CfgNode, dataset_name):
-        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
-
-    @classmethod
-    def build_train_loader(cls, cfg: CfgNode):
-        return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
-
-    @classmethod
-    def test_with_TTA(cls, cfg: CfgNode, model):
-        logger = logging.getLogger("detectron2.trainer")
-        # In the end of training, run an evaluation with TTA
-        # Only support some R-CNN models.
-        logger.info("Running inference with test-time augmentation ...")
-        transform_data = load_from_cfg(cfg)
-        model = DensePoseGeneralizedRCNNWithTTA(cfg, model, transform_data, DatasetMapperTTA(cfg))
-        evaluators = [
-            cls.build_evaluator(
-                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
-            )
-            for name in cfg.DATASETS.TEST
-        ]
-        res = cls.test(cfg, model, evaluators)
-        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
-        return res
-
-
-def setup(args):
-    cfg = get_cfg()
-    add_dataset_category_config(cfg)
-    add_densepose_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    # Setup logger for "densepose" module
-    setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-    # disable strict kwargs checking: allow one to specify path handle
-    # hints through kwargs, like timeout in DP evaluation
-    PathManager.set_strict_kwargs_checking(False)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if cfg.TEST.AUG.ENABLED:
-            res.update(Trainer.test_with_TTA(cfg, model))
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    if cfg.TEST.AUG.ENABLED:
-        trainer.register_hooks(
-            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
-        )
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md
deleted file mode 100644
index 443736f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# PointRend: Image Segmentation as Rendering
-
-Alexander Kirillov, Yuxin Wu, Kaiming He, Ross Girshick
-
-[[`arXiv`](https://arxiv.org/abs/1912.08193)] [[`BibTeX`](#CitingPointRend)]
-
-<div align="center">
-  <img src="https://alexander-kirillov.github.io/images/kirillov2019pointrend.jpg"/>
-</div><br/>
-
-In this repository, we release code for PointRend in Detectron2. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models.
-
-## Installation
-Install Detectron 2 following [INSTALL.md](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md). You are ready to go!
-
-## Quick start and visualization
-
-This [Colab Notebook](https://colab.research.google.com/drive/1isGPL5h5_cKoPPhVL9XhMokRtHDvmMVL) tutorial contains examples of PointRend usage and visualizations of its point sampling stages.
-
-## Training
-
-To train a model with 8 GPUs run:
-```bash
-cd /path/to/detectron2/projects/PointRend
-python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --num-gpus 8
-```
-
-## Evaluation
-
-Model evaluation can be done similarly:
-```bash
-cd /path/to/detectron2/projects/PointRend
-python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint
-```
-
-# Pretrained Models
-
-## Instance Segmentation
-#### COCO
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Mask<br/>head</th>
-<th valign="bottom">Backbone</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">Output<br/>resolution</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">mask<br/>AP&ast;</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
- <tr><td align="left"><a href="configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml">PointRend</a></td>
-<td align="center">R50-FPN</td>
-<td align="center">1&times;</td>
-<td align="center">224&times;224</td>
-<td align="center">36.2</td>
-<td align="center">39.7</td>
-<td align="center">164254221</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco/164254221/model_final_88c6f8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco/164254221/metrics.json">metrics</a></td>
-</tr>
- <tr><td align="left"><a href="configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml">PointRend</a></td>
-<td align="center">R50-FPN</td>
-<td align="center">3&times;</td>
-<td align="center">224&times;224</td>
-<td align="center">38.3</td>
-<td align="center">41.6</td>
-<td align="center">164955410</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco/164955410/model_final_3c3198.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco/164955410/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-AP&ast; is COCO mask AP evaluated against the higher-quality LVIS annotations; see the paper for details. Run `python detectron2/datasets/prepare_cocofied_lvis.py` to prepare GT files for AP&ast; evaluation. Since LVIS annotations are not exhaustive `lvis-api` and not `cocoapi` should be used to evaluate AP&ast;.
-
-#### Cityscapes
-Cityscapes model is trained with ImageNet pretraining.
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Mask<br/>head</th>
-<th valign="bottom">Backbone</th>
-<th valign="bottom">lr<br/>sched</th>
-<th valign="bottom">Output<br/>resolution</th>
-<th valign="bottom">mask<br/>AP</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
- <tr><td align="left"><a href="configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml">PointRend</a></td>
-<td align="center">R50-FPN</td>
-<td align="center">1&times;</td>
-<td align="center">224&times;224</td>
-<td align="center">35.9</td>
-<td align="center">164255101</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes/164255101/model_final_318a02.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes/164255101/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-
-## Semantic Segmentation
-
-#### Cityscapes
-Cityscapes model is trained with ImageNet pretraining.
-
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">Method</th>
-<th valign="bottom">Backbone</th>
-<th valign="bottom">Output<br/>resolution</th>
-<th valign="bottom">mIoU</th>
-<th valign="bottom">model id</th>
-<th valign="bottom">download</th>
-<!-- TABLE BODY -->
- <tr><td align="left"><a href="configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml">SemanticFPN + PointRend</a></td>
-<td align="center">R101-FPN</td>
-<td align="center">1024&times;2048</td>
-<td align="center">78.6</td>
-<td align="center">186480235</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes/186480235/model_final_5f3665.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PointRend/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes/186480235/metrics.json">metrics</a></td>
-</tr>
-</tbody></table>
-
-## <a name="CitingPointRend"></a>Citing PointRend
-
-If you use PointRend, please use the following BibTeX entry.
-
-```BibTeX
-@InProceedings{kirillov2019pointrend,
-  title={{PointRend}: Image Segmentation as Rendering},
-  author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick},
-  journal={ArXiv:1912.08193},
-  year={2019}
-}
-```
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml
deleted file mode 100644
index d391718..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-_BASE_: "../../../../configs/Base-RCNN-FPN.yaml"
-MODEL:
-  ROI_HEADS:
-    NAME: "PointRendROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-  ROI_BOX_HEAD:
-    TRAIN_ON_PRED_BOXES: True
-  ROI_MASK_HEAD:
-    NAME: "CoarseMaskHead"
-    FC_DIM: 1024
-    NUM_FC: 2
-    OUTPUT_SIDE_RESOLUTION: 7
-    IN_FEATURES: ["p2"]
-    POINT_HEAD_ON: True
-  POINT_HEAD:
-    FC_DIM: 256
-    NUM_FC: 3
-    IN_FEATURES: ["p2"]
-INPUT:
-  # PointRend for instance segmenation does not work with "polygon" mask_format.
-  MASK_FORMAT: "bitmask"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml
deleted file mode 100644
index c23dbe1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-_BASE_: Base-PointRend-RCNN-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
-  MASK_ON: true
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 8
-  POINT_HEAD:
-    NUM_CLASSES: 8
-DATASETS:
-  TEST: ("cityscapes_fine_instance_seg_val",)
-  TRAIN: ("cityscapes_fine_instance_seg_train",)
-SOLVER:
-  BASE_LR: 0.01
-  IMS_PER_BATCH: 8
-  MAX_ITER: 24000
-  STEPS: (18000,)
-INPUT:
-  MAX_SIZE_TEST: 2048
-  MAX_SIZE_TRAIN: 2048
-  MIN_SIZE_TEST: 1024
-  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml
deleted file mode 100644
index e9fc573..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: Base-PointRend-RCNN-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
-  MASK_ON: true
-  RESNETS:
-    DEPTH: 50
-# To add COCO AP evaluation against the higher-quality LVIS annotations.
-# DATASETS:
-#   TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied")
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml
deleted file mode 100644
index 2f013f3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-_BASE_: Base-PointRend-RCNN-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
-  MASK_ON: true
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
-# To add COCO AP evaluation against the higher-quality LVIS annotations.
-# DATASETS:
-#   TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied")
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml
deleted file mode 100644
index a4af81d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-_BASE_: Base-PointRend-RCNN-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
-  MASK_ON: true
-  RESNETS:
-    DEPTH: 50
-  ROI_HEADS:
-    NUM_CLASSES: 1
-  POINT_HEAD:
-    NUM_CLASSES: 1
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
-  IMS_PER_BATCH: 1
-# To add COCO AP evaluation against the higher-quality LVIS annotations.
-# DATASETS:
-#   TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied")
-DATASETS:
-  TRAIN: ("CIHP_train",)
-  TEST: ("CIHP_val",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml
deleted file mode 100644
index 8e52d82..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-_BASE_: Base-PointRend-RCNN-FPN.yaml
-MODEL:
-  WEIGHTS: "./X-101-32x8d.pkl"
-  PIXEL_STD: [57.375, 57.120, 58.395]
-  MASK_ON: true
-  RESNETS:
-    STRIDE_IN_1X1: False  # this is a C2 model
-    NUM_GROUPS: 32
-    WIDTH_PER_GROUP: 8
-    DEPTH: 101
-  ROI_HEADS:
-    NUM_CLASSES: 1
-  POINT_HEAD:
-    NUM_CLASSES: 1
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
-  IMS_PER_BATCH: 1
-# To add COCO AP evaluation against the higher-quality LVIS annotations.
-# DATASETS:
-#   TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied")
-INPUT:
-  MIN_SIZE_TRAIN: (640, 864)
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MAX_SIZE_TRAIN: 1440
-DATASETS:
-  TRAIN: ("CIHP_train",)
-  TEST: ("CIHP_val",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml
deleted file mode 100644
index 00562a9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-_BASE_: "../../../../configs/Base-RCNN-FPN.yaml"
-MODEL:
-  META_ARCHITECTURE: "SemanticSegmentor"
-  BACKBONE:
-    FREEZE_AT: 0
-  SEM_SEG_HEAD:
-    NAME: "PointRendSemSegHead"
-  POINT_HEAD:
-    NUM_CLASSES: 54
-    FC_DIM: 256
-    NUM_FC: 3
-    IN_FEATURES: ["p2"]
-    TRAIN_NUM_POINTS: 1024
-    SUBDIVISION_STEPS: 2
-    SUBDIVISION_NUM_POINTS: 8192
-    COARSE_SEM_SEG_HEAD_NAME: "SemSegFPNHead"
-DATASETS:
-  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
-  TEST: ("coco_2017_val_panoptic_stuffonly",)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml
deleted file mode 100644
index 4965b06..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-_BASE_: Base-PointRend-Semantic-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-101.pkl
-  RESNETS:
-    DEPTH: 101
-  SEM_SEG_HEAD:
-    NUM_CLASSES: 19
-  POINT_HEAD:
-    NUM_CLASSES: 19
-    TRAIN_NUM_POINTS: 2048
-    SUBDIVISION_NUM_POINTS: 8192
-DATASETS:
-  TRAIN: ("cityscapes_fine_sem_seg_train",)
-  TEST: ("cityscapes_fine_sem_seg_val",)
-SOLVER:
-  BASE_LR: 0.01
-  STEPS: (40000, 55000)
-  MAX_ITER: 65000
-  IMS_PER_BATCH: 32
-INPUT:
-  MIN_SIZE_TRAIN: (512, 768, 1024, 1280, 1536, 1792, 2048)
-  MIN_SIZE_TRAIN_SAMPLING: "choice"
-  MIN_SIZE_TEST: 1024
-  MAX_SIZE_TRAIN: 4096
-  MAX_SIZE_TEST: 2048
-  CROP:
-    ENABLED: True
-    TYPE: "absolute"
-    SIZE: (512, 1024)
-    SINGLE_CATEGORY_MAX_AREA: 0.75
-  COLOR_AUG_SSD: True
-DATALOADER:
-  NUM_WORKERS: 16
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml
deleted file mode 100644
index 7948bd8..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: Base-PointRend-Semantic-FPN.yaml
-MODEL:
-  WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py
deleted file mode 100644
index b99baf9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-PointRend Training Script.
-
-This script is a simplified version of the training script in detectron2/tools.
-"""
-
-import os
-import torch
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog, build_detection_train_loader
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
-from detectron2.evaluation import (
-    CityscapesInstanceEvaluator,
-    CityscapesSemSegEvaluator,
-    COCOEvaluator,
-    DatasetEvaluators,
-    LVISEvaluator,
-    SemSegEvaluator,
-    verify_results,
-)
-
-from point_rend import SemSegDatasetMapper, add_pointrend_config
-
-os.environ['CUDA_VISIBLE_DEVICES'] = '4'
-# Register Custom Dataset
-from detectron2.data.datasets import register_coco_instances
-register_coco_instances("CIHP_train", {}, "/data03/v_xuyunqiu/multi_parsing/data/msrcnn_finetune_annotations/CIHP_train.json", "/data03/v_xuyunqiu/data/instance-level_human_parsing/Training/Images")
-register_coco_instances("CIHP_val", {}, "/data03/v_xuyunqiu/multi_parsing/data/msrcnn_finetune_annotations/CIHP_val.json", "/data03/v_xuyunqiu/data/instance-level_human_parsing/Validation/Images")
-
-
-class Trainer(DefaultTrainer):
-    """
-    We use the "DefaultTrainer" which contains a number pre-defined logic for
-    standard training workflow. They may not work for you, especially if you
-    are working on a new research project. In that case you can use the cleaner
-    "SimpleTrainer", or write your own training loop.
-    """
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        """
-        Create evaluator(s) for a given dataset.
-        This uses the special metadata "evaluator_type" associated with each builtin dataset.
-        For your own dataset, you can simply create an evaluator manually in your
-        script and do not have to worry about the hacky if-else logic here.
-        """
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        evaluator_list = []
-        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
-        if evaluator_type == "lvis":
-            return LVISEvaluator(dataset_name, cfg, True, output_folder)
-        if evaluator_type == "coco":
-            return COCOEvaluator(dataset_name, cfg, True, output_folder)
-        if evaluator_type == "sem_seg":
-            return SemSegEvaluator(
-                dataset_name,
-                distributed=True,
-                num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
-                ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
-                output_dir=output_folder,
-            )
-        if evaluator_type == "cityscapes_instance":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesInstanceEvaluator(dataset_name)
-        if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesSemSegEvaluator(dataset_name)
-        if len(evaluator_list) == 0:
-            raise NotImplementedError(
-                "no Evaluator for the dataset {} with the type {}".format(
-                    dataset_name, evaluator_type
-                )
-            )
-        if len(evaluator_list) == 1:
-            return evaluator_list[0]
-        return DatasetEvaluators(evaluator_list)
-
-    @classmethod
-    def build_train_loader(cls, cfg):
-        if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE:
-            mapper = SemSegDatasetMapper(cfg, True)
-        else:
-            mapper = None
-        return build_detection_train_loader(cfg, mapper=mapper)
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    add_pointrend_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/logs/hadoop.kylin.libdfs.log b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/logs/hadoop.kylin.libdfs.log
deleted file mode 100644
index e69de29..0000000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py
deleted file mode 100644
index 4020fe0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .config import add_pointrend_config
-from .coarse_mask_head import CoarseMaskHead
-from .roi_heads import PointRendROIHeads
-from .dataset_mapper import SemSegDatasetMapper
-from .semantic_seg import PointRendSemSegHead
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py
deleted file mode 100644
index 3f1cffb..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.layers import Conv2d, ShapeSpec
-from detectron2.modeling import ROI_MASK_HEAD_REGISTRY
-
-
-@ROI_MASK_HEAD_REGISTRY.register()
-class CoarseMaskHead(nn.Module):
-    """
-    A mask head with fully connected layers. Given pooled features it first reduces channels and
-    spatial dimensions with conv layers and then uses FC layers to predict coarse masks analogously
-    to the standard box head.
-    """
-
-    def __init__(self, cfg, input_shape: ShapeSpec):
-        """
-        The following attributes are parsed from config:
-            conv_dim: the output dimension of the conv layers
-            fc_dim: the feature dimenstion of the FC layers
-            num_fc: the number of FC layers
-            output_side_resolution: side resolution of the output square mask prediction
-        """
-        super(CoarseMaskHead, self).__init__()
-
-        # fmt: off
-        self.num_classes            = cfg.MODEL.ROI_HEADS.NUM_CLASSES
-        conv_dim                    = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
-        self.fc_dim                 = cfg.MODEL.ROI_MASK_HEAD.FC_DIM
-        num_fc                      = cfg.MODEL.ROI_MASK_HEAD.NUM_FC
-        self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION
-        self.input_channels         = input_shape.channels
-        self.input_h                = input_shape.height
-        self.input_w                = input_shape.width
-        # fmt: on
-
-        self.conv_layers = []
-        if self.input_channels > conv_dim:
-            self.reduce_channel_dim_conv = Conv2d(
-                self.input_channels,
-                conv_dim,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=True,
-                activation=F.relu,
-            )
-            self.conv_layers.append(self.reduce_channel_dim_conv)
-
-        self.reduce_spatial_dim_conv = Conv2d(
-            conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu
-        )
-        self.conv_layers.append(self.reduce_spatial_dim_conv)
-
-        input_dim = conv_dim * self.input_h * self.input_w
-        input_dim //= 4
-
-        self.fcs = []
-        for k in range(num_fc):
-            fc = nn.Linear(input_dim, self.fc_dim)
-            self.add_module("coarse_mask_fc{}".format(k + 1), fc)
-            self.fcs.append(fc)
-            input_dim = self.fc_dim
-
-        output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution
-
-        self.prediction = nn.Linear(self.fc_dim, output_dim)
-        # use normal distribution initialization for mask prediction layer
-        nn.init.normal_(self.prediction.weight, std=0.001)
-        nn.init.constant_(self.prediction.bias, 0)
-
-        for layer in self.conv_layers:
-            weight_init.c2_msra_fill(layer)
-        for layer in self.fcs:
-            weight_init.c2_xavier_fill(layer)
-
-    def forward(self, x):
-        # unlike BaseMaskRCNNHead, this head only outputs intermediate
-        # features, because the features will be used later by PointHead.
-        N = x.shape[0]
-        x = x.view(N, self.input_channels, self.input_h, self.input_w)
-        for layer in self.conv_layers:
-            x = layer(x)
-        x = torch.flatten(x, start_dim=1)
-        for layer in self.fcs:
-            x = F.relu(layer(x))
-        return self.prediction(x).view(
-            N, self.num_classes, self.output_side_resolution, self.output_side_resolution
-        )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py
deleted file mode 100644
index 27344c4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import random
-import cv2
-from fvcore.transforms.transform import Transform
-
-
-class ColorAugSSDTransform(Transform):
-    """
-    A color related data augmentation used in Single Shot Multibox Detector (SSD).
-
-    Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
-       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
-       SSD: Single Shot MultiBox Detector. ECCV 2016.
-
-    Implementation based on:
-
-     https://github.com/weiliu89/caffe/blob
-       /4817bf8b4200b35ada8ed0dc378dceaf38c539e4
-       /src/caffe/util/im_transforms.cpp
-
-     https://github.com/chainer/chainercv/blob
-       /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv
-       /links/model/ssd/transforms.py
-    """
-
-    def __init__(
-        self,
-        img_format,
-        brightness_delta=32,
-        contrast_low=0.5,
-        contrast_high=1.5,
-        saturation_low=0.5,
-        saturation_high=1.5,
-        hue_delta=18,
-    ):
-        super().__init__()
-        assert img_format in ["BGR", "RGB"]
-        self.is_rgb = img_format == "RGB"
-        del img_format
-        self._set_attributes(locals())
-
-    def apply_coords(self, coords):
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        return segmentation
-
-    def apply_image(self, img, interp=None):
-        if self.is_rgb:
-            img = img[:, :, [2, 1, 0]]
-        img = self.brightness(img)
-        if random.randrange(2):
-            img = self.contrast(img)
-            img = self.saturation(img)
-            img = self.hue(img)
-        else:
-            img = self.saturation(img)
-            img = self.hue(img)
-            img = self.contrast(img)
-        if self.is_rgb:
-            img = img[:, :, [2, 1, 0]]
-        return img
-
-    def convert(self, img, alpha=1, beta=0):
-        img = img.astype(np.float32) * alpha + beta
-        img = np.clip(img, 0, 255)
-        return img.astype(np.uint8)
-
-    def brightness(self, img):
-        if random.randrange(2):
-            return self.convert(
-                img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)
-            )
-        return img
-
-    def contrast(self, img):
-        if random.randrange(2):
-            return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high))
-        return img
-
-    def saturation(self, img):
-        if random.randrange(2):
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
-            img[:, :, 1] = self.convert(
-                img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high)
-            )
-            return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
-        return img
-
-    def hue(self, img):
-        if random.randrange(2):
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
-            img[:, :, 0] = (
-                img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)
-            ) % 180
-            return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
-        return img
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py
deleted file mode 100644
index 74f6367..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from detectron2.config import CfgNode as CN
-
-
-def add_pointrend_config(cfg):
-    """
-    Add config for PointRend.
-    """
-    # We retry random cropping until no single category in semantic segmentation GT occupies more
-    # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
-    cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
-    # Color augmentatition from SSD paper for semantic segmentation model during training.
-    cfg.INPUT.COLOR_AUG_SSD = False
-
-    # Names of the input feature maps to be used by a coarse mask head.
-    cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",)
-    cfg.MODEL.ROI_MASK_HEAD.FC_DIM = 1024
-    cfg.MODEL.ROI_MASK_HEAD.NUM_FC = 2
-    # The side size of a coarse mask head prediction.
-    cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION = 7
-    # True if point head is used.
-    cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = False
-
-    cfg.MODEL.POINT_HEAD = CN()
-    cfg.MODEL.POINT_HEAD.NAME = "StandardPointHead"
-    cfg.MODEL.POINT_HEAD.NUM_CLASSES = 80
-    # Names of the input feature maps to be used by a mask point head.
-    cfg.MODEL.POINT_HEAD.IN_FEATURES = ("p2",)
-    # Number of points sampled during training for a mask point head.
-    cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS = 14 * 14
-    # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the
-    # original paper.
-    cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO = 3
-    # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in
-    # the original paper.
-    cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO = 0.75
-    # Number of subdivision steps during inference.
-    cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS = 5
-    # Maximum number of points selected at each subdivision step (N).
-    cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS = 28 * 28
-    cfg.MODEL.POINT_HEAD.FC_DIM = 256
-    cfg.MODEL.POINT_HEAD.NUM_FC = 3
-    cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK = False
-    # If True, then coarse prediction features are used as inout for each layer in PointRend's MLP.
-    cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER = True
-    cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME = "SemSegFPNHead"
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py
deleted file mode 100644
index 76b64ee..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import numpy as np
-import torch
-from fvcore.common.file_io import PathManager
-from fvcore.transforms.transform import CropTransform
-from PIL import Image
-
-from detectron2.data import detection_utils as utils
-from detectron2.data import transforms as T
-
-from .color_augmentation import ColorAugSSDTransform
-
-"""
-This file contains the mapping that's applied to "dataset dicts" for semantic segmentation models.
-Unlike the default DatasetMapper this mapper uses cropping as the last transformation.
-"""
-
-__all__ = ["SemSegDatasetMapper"]
-
-
-class SemSegDatasetMapper:
-    """
-    A callable which takes a dataset dict in Detectron2 Dataset format,
-    and map it into a format used by semantic segmentation models.
-
-    The callable currently does the following:
-
-    1. Read the image from "file_name"
-    2. Applies geometric transforms to the image and annotation
-    3. Find and applies suitable cropping to the image and annotation
-    4. Prepare image and annotation to Tensors
-    """
-
-    def __init__(self, cfg, is_train=True):
-        if cfg.INPUT.CROP.ENABLED and is_train:
-            self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
-            logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
-        else:
-            self.crop_gen = None
-
-        self.tfm_gens = utils.build_transform_gen(cfg, is_train)
-
-        if cfg.INPUT.COLOR_AUG_SSD:
-            self.tfm_gens.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT))
-            logging.getLogger(__name__).info(
-                "Color augmnetation used in training: " + str(self.tfm_gens[-1])
-            )
-
-        # fmt: off
-        self.img_format               = cfg.INPUT.FORMAT
-        self.single_category_max_area = cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA
-        self.ignore_value             = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
-        # fmt: on
-
-        self.is_train = is_train
-
-    def __call__(self, dataset_dict):
-        """
-        Args:
-            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
-
-        Returns:
-            dict: a format that builtin models in detectron2 accept
-        """
-        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
-        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
-        utils.check_image_size(dataset_dict, image)
-        assert "sem_seg_file_name" in dataset_dict
-
-        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
-        if self.is_train:
-            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
-                sem_seg_gt = Image.open(f)
-                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
-            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
-            if self.crop_gen:
-                image, sem_seg_gt = crop_transform(
-                    image,
-                    sem_seg_gt,
-                    self.crop_gen,
-                    self.single_category_max_area,
-                    self.ignore_value,
-                )
-            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
-
-        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
-        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
-        # Therefore it's important to use torch.Tensor.
-        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
-
-        if not self.is_train:
-            dataset_dict.pop("sem_seg_file_name", None)
-            return dataset_dict
-
-        return dataset_dict
-
-
-def crop_transform(image, sem_seg, crop_gen, single_category_max_area, ignore_value):
-    """
-    Find a cropping window such that no single category occupies more than
-        `single_category_max_area` in `sem_seg`. The function retries random cropping 10 times max.
-    """
-    if single_category_max_area >= 1.0:
-        crop_tfm = crop_gen.get_transform(image)
-        sem_seg_temp = crop_tfm.apply_segmentation(sem_seg)
-    else:
-        h, w = sem_seg.shape
-        crop_size = crop_gen.get_crop_size((h, w))
-        for _ in range(10):
-            y0 = np.random.randint(h - crop_size[0] + 1)
-            x0 = np.random.randint(w - crop_size[1] + 1)
-            sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]]
-            labels, cnt = np.unique(sem_seg_temp, return_counts=True)
-            cnt = cnt[labels != ignore_value]
-            if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < single_category_max_area:
-                break
-        crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0])
-    image = crop_tfm.apply_image(image)
-    return image, sem_seg_temp
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py
deleted file mode 100644
index 320a33d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-from torch.nn import functional as F
-
-from detectron2.layers import cat
-from detectron2.structures import Boxes
-
-
-"""
-Shape shorthand in this module:
-
-    N: minibatch dimension size, i.e. the number of RoIs for instance segmenation or the
-        number of images for semantic segmenation.
-    R: number of ROIs, combined over all images, in the minibatch
-    P: number of points
-"""
-
-
-def point_sample(input, point_coords, **kwargs):
-    """
-    A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors.
-    Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside
-    [0, 1] x [0, 1] square.
-
-    Args:
-        input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid.
-        point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains
-        [0, 1] x [0, 1] normalized point coordinates.
-
-    Returns:
-        output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains
-            features for points in `point_coords`. The features are obtained via bilinear
-            interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`.
-    """
-    add_dim = False
-    if point_coords.dim() == 3:
-        add_dim = True
-        point_coords = point_coords.unsqueeze(2)
-    output = F.grid_sample(input, 2.0 * point_coords - 1.0, **kwargs)
-    if add_dim:
-        output = output.squeeze(3)
-    return output
-
-
-def generate_regular_grid_point_coords(R, side_size, device):
-    """
-    Generate regular square grid of points in [0, 1] x [0, 1] coordinate space.
-
-    Args:
-        R (int): The number of grids to sample, one for each region.
-        side_size (int): The side size of the regular grid.
-        device (torch.device): Desired device of returned tensor.
-
-    Returns:
-        (Tensor): A tensor of shape (R, side_size^2, 2) that contains coordinates
-            for the regular grids.
-    """
-    aff = torch.tensor([[[0.5, 0, 0.5], [0, 0.5, 0.5]]], device=device)
-    r = F.affine_grid(aff, torch.Size((1, 1, side_size, side_size)), align_corners=False)
-    return r.view(1, -1, 2).expand(R, -1, -1)
-
-
-def get_uncertain_point_coords_with_randomness(
-    coarse_logits, uncertainty_func, num_points, oversample_ratio, importance_sample_ratio
-):
-    """
-    Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The unceratinties
-        are calculated for each point using 'uncertainty_func' function that takes point's logit
-        prediction as input.
-    See PointRend paper for details.
-
-    Args:
-        coarse_logits (Tensor): A tensor of shape (N, C, Hmask, Wmask) or (N, 1, Hmask, Wmask) for
-            class-specific or class-agnostic prediction.
-        uncertainty_func: A function that takes a Tensor of shape (N, C, P) or (N, 1, P) that
-            contains logit predictions for P points and returns their uncertainties as a Tensor of
-            shape (N, 1, P).
-        num_points (int): The number of points P to sample.
-        oversample_ratio (int): Oversampling parameter.
-        importance_sample_ratio (float): Ratio of points that are sampled via importnace sampling.
-
-    Returns:
-        point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P
-            sampled points.
-    """
-    assert oversample_ratio >= 1
-    assert importance_sample_ratio <= 1 and importance_sample_ratio >= 0
-    num_boxes = coarse_logits.shape[0]
-    num_sampled = int(num_points * oversample_ratio)
-    point_coords = torch.rand(num_boxes, num_sampled, 2, device=coarse_logits.device)
-    point_logits = point_sample(coarse_logits, point_coords, align_corners=False)
-    # It is crucial to calculate uncertainty based on the sampled prediction value for the points.
-    # Calculating uncertainties of the coarse predictions first and sampling them for points leads
-    # to incorrect results.
-    # To illustrate this: assume uncertainty_func(logits)=-abs(logits), a sampled point between
-    # two coarse predictions with -1 and 1 logits has 0 logits, and therefore 0 uncertainty value.
-    # However, if we calculate uncertainties for the coarse predictions first,
-    # both will have -1 uncertainty, and the sampled point will get -1 uncertainty.
-    point_uncertainties = uncertainty_func(point_logits)
-    num_uncertain_points = int(importance_sample_ratio * num_points)
-    num_random_points = num_points - num_uncertain_points
-    idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1]
-    shift = num_sampled * torch.arange(num_boxes, dtype=torch.long, device=coarse_logits.device)
-    idx += shift[:, None]
-    point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(
-        num_boxes, num_uncertain_points, 2
-    )
-    if num_random_points > 0:
-        point_coords = cat(
-            [
-                point_coords,
-                torch.rand(num_boxes, num_random_points, 2, device=coarse_logits.device),
-            ],
-            dim=1,
-        )
-    return point_coords
-
-
-def get_uncertain_point_coords_on_grid(uncertainty_map, num_points):
-    """
-    Find `num_points` most uncertain points from `uncertainty_map` grid.
-
-    Args:
-        uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty
-            values for a set of points on a regular H x W grid.
-        num_points (int): The number of points P to select.
-
-    Returns:
-        point_indices (Tensor): A tensor of shape (N, P) that contains indices from
-            [0, H x W) of the most uncertain points.
-        point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized
-            coordinates of the most uncertain points from the H x W grid.
-    """
-    R, _, H, W = uncertainty_map.shape
-    h_step = 1.0 / float(H)
-    w_step = 1.0 / float(W)
-
-    num_points = min(H * W, num_points)
-    point_indices = torch.topk(uncertainty_map.view(R, H * W), k=num_points, dim=1)[1]
-    point_coords = torch.zeros(R, num_points, 2, dtype=torch.float, device=uncertainty_map.device)
-    point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step
-    point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step
-    return point_indices, point_coords
-
-
-def point_sample_fine_grained_features(features_list, feature_scales, boxes, point_coords):
-    """
-    Get features from feature maps in `features_list` that correspond to specific point coordinates
-        inside each bounding box from `boxes`.
-
-    Args:
-        features_list (list[Tensor]): A list of feature map tensors to get features from.
-        feature_scales (list[float]): A list of scales for tensors in `features_list`.
-        boxes (list[Boxes]): A list of I Boxes  objects that contain R_1 + ... + R_I = R boxes all
-            together.
-        point_coords (Tensor): A tensor of shape (R, P, 2) that contains
-            [0, 1] x [0, 1] box-normalized coordinates of the P sampled points.
-
-    Returns:
-        point_features (Tensor): A tensor of shape (R, C, P) that contains features sampled
-            from all features maps in feature_list for P sampled points for all R boxes in `boxes`.
-        point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains image-level
-            coordinates of P points.
-    """
-    cat_boxes = Boxes.cat(boxes)
-    num_boxes = [len(b) for b in boxes]
-
-    point_coords_wrt_image = get_point_coords_wrt_image(cat_boxes.tensor, point_coords)
-    split_point_coords_wrt_image = torch.split(point_coords_wrt_image, num_boxes)
-
-    point_features = []
-    for idx_img, point_coords_wrt_image_per_image in enumerate(split_point_coords_wrt_image):
-        point_features_per_image = []
-        for idx_feature, feature_map in enumerate(features_list):
-            h, w = feature_map.shape[-2:]
-            scale = torch.tensor([w, h], device=feature_map.device) / feature_scales[idx_feature]
-            point_coords_scaled = point_coords_wrt_image_per_image / scale
-            point_features_per_image.append(
-                point_sample(
-                    feature_map[idx_img].unsqueeze(0),
-                    point_coords_scaled.unsqueeze(0),
-                    align_corners=False,
-                )
-                .squeeze(0)
-                .transpose(1, 0)
-            )
-        point_features.append(cat(point_features_per_image, dim=1))
-
-    return cat(point_features, dim=0), point_coords_wrt_image
-
-
-def get_point_coords_wrt_image(boxes_coords, point_coords):
-    """
-    Convert box-normalized [0, 1] x [0, 1] point cooordinates to image-level coordinates.
-
-    Args:
-        boxes_coords (Tensor): A tensor of shape (R, 4) that contains bounding boxes.
-            coordinates.
-        point_coords (Tensor): A tensor of shape (R, P, 2) that contains
-            [0, 1] x [0, 1] box-normalized coordinates of the P sampled points.
-
-    Returns:
-        point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains
-            image-normalized coordinates of P sampled points.
-    """
-    with torch.no_grad():
-        point_coords_wrt_image = point_coords.clone()
-        point_coords_wrt_image[:, :, 0] = point_coords_wrt_image[:, :, 0] * (
-            boxes_coords[:, None, 2] - boxes_coords[:, None, 0]
-        )
-        point_coords_wrt_image[:, :, 1] = point_coords_wrt_image[:, :, 1] * (
-            boxes_coords[:, None, 3] - boxes_coords[:, None, 1]
-        )
-        point_coords_wrt_image[:, :, 0] += boxes_coords[:, None, 0]
-        point_coords_wrt_image[:, :, 1] += boxes_coords[:, None, 1]
-    return point_coords_wrt_image
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py
deleted file mode 100644
index 6f35bae..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import fvcore.nn.weight_init as weight_init
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.layers import ShapeSpec, cat
-from detectron2.structures import BitMasks
-from detectron2.utils.events import get_event_storage
-from detectron2.utils.registry import Registry
-
-from .point_features import point_sample
-
-POINT_HEAD_REGISTRY = Registry("POINT_HEAD")
-POINT_HEAD_REGISTRY.__doc__ = """
-Registry for point heads, which makes prediction for a given set of per-point features.
-
-The registered object will be called with `obj(cfg, input_shape)`.
-"""
-
-
-def roi_mask_point_loss(mask_logits, instances, points_coord):
-    """
-    Compute the point-based loss for instance segmentation mask predictions.
-
-    Args:
-        mask_logits (Tensor): A tensor of shape (R, C, P) or (R, 1, P) for class-specific or
-            class-agnostic, where R is the total number of predicted masks in all images, C is the
-            number of foreground classes, and P is the number of points sampled for each mask.
-            The values are logits.
-        instances (list[Instances]): A list of N Instances, where N is the number of images
-            in the batch. These instances are in 1:1 correspondence with the `mask_logits`. So, i_th
-            elememt of the list contains R_i objects and R_1 + ... + R_N is equal to R.
-            The ground-truth labels (class, box, mask, ...) associated with each instance are stored
-            in fields.
-        points_coords (Tensor): A tensor of shape (R, P, 2), where R is the total number of
-            predicted masks and P is the number of points for each mask. The coordinates are in
-            the image pixel coordinate space, i.e. [0, H] x [0, W].
-    Returns:
-        point_loss (Tensor): A scalar tensor containing the loss.
-    """
-    assert len(instances) == 0 or isinstance(
-        instances[0].gt_masks, BitMasks
-    ), "Point head works with GT in 'bitmask' format only. Set INPUT.MASK_FORMAT to 'bitmask'."
-    with torch.no_grad():
-        cls_agnostic_mask = mask_logits.size(1) == 1
-        total_num_masks = mask_logits.size(0)
-
-        gt_classes = []
-        gt_mask_logits = []
-        idx = 0
-        for instances_per_image in instances:
-            if not cls_agnostic_mask:
-                gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64)
-                gt_classes.append(gt_classes_per_image)
-
-            gt_bit_masks = instances_per_image.gt_masks.tensor
-            h, w = instances_per_image.gt_masks.image_size
-            scale = torch.tensor([w, h], dtype=torch.float, device=gt_bit_masks.device)
-            points_coord_grid_sample_format = (
-                points_coord[idx : idx + len(instances_per_image)] / scale
-            )
-            idx += len(instances_per_image)
-            gt_mask_logits.append(
-                point_sample(
-                    gt_bit_masks.to(torch.float32).unsqueeze(1),
-                    points_coord_grid_sample_format,
-                    align_corners=False,
-                ).squeeze(1)
-            )
-        gt_mask_logits = cat(gt_mask_logits)
-
-    # torch.mean (in binary_cross_entropy_with_logits) doesn't
-    # accept empty tensors, so handle it separately
-    if gt_mask_logits.numel() == 0:
-        return mask_logits.sum() * 0
-
-    if cls_agnostic_mask:
-        mask_logits = mask_logits[:, 0]
-    else:
-        indices = torch.arange(total_num_masks)
-        gt_classes = cat(gt_classes, dim=0)
-        mask_logits = mask_logits[indices, gt_classes]
-
-    # Log the training accuracy (using gt classes and 0.0 threshold for the logits)
-    mask_accurate = (mask_logits > 0.0) == gt_mask_logits.to(dtype=torch.uint8)
-    mask_accuracy = mask_accurate.nonzero().size(0) / mask_accurate.numel()
-    get_event_storage().put_scalar("point_rend/accuracy", mask_accuracy)
-
-    point_loss = F.binary_cross_entropy_with_logits(
-        mask_logits, gt_mask_logits.to(dtype=torch.float32), reduction="mean"
-    )
-    return point_loss
-
-
-@POINT_HEAD_REGISTRY.register()
-class StandardPointHead(nn.Module):
-    """
-    A point head multi-layer perceptron which we model with conv1d layers with kernel 1. The head
-    takes both fine-grained and coarse prediction features as its input.
-    """
-
-    def __init__(self, cfg, input_shape: ShapeSpec):
-        """
-        The following attributes are parsed from config:
-            fc_dim: the output dimension of each FC layers
-            num_fc: the number of FC layers
-            coarse_pred_each_layer: if True, coarse prediction features are concatenated to each
-                layer's input
-        """
-        super(StandardPointHead, self).__init__()
-        # fmt: off
-        num_classes                 = cfg.MODEL.POINT_HEAD.NUM_CLASSES
-        fc_dim                      = cfg.MODEL.POINT_HEAD.FC_DIM
-        num_fc                      = cfg.MODEL.POINT_HEAD.NUM_FC
-        cls_agnostic_mask           = cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK
-        self.coarse_pred_each_layer = cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER
-        input_channels              = input_shape.channels
-        # fmt: on
-
-        fc_dim_in = input_channels + num_classes
-        self.fc_layers = []
-        for k in range(num_fc):
-            fc = nn.Conv1d(fc_dim_in, fc_dim, kernel_size=1, stride=1, padding=0, bias=True)
-            self.add_module("fc{}".format(k + 1), fc)
-            self.fc_layers.append(fc)
-            fc_dim_in = fc_dim
-            fc_dim_in += num_classes if self.coarse_pred_each_layer else 0
-
-        num_mask_classes = 1 if cls_agnostic_mask else num_classes
-        self.predictor = nn.Conv1d(fc_dim_in, num_mask_classes, kernel_size=1, stride=1, padding=0)
-
-        for layer in self.fc_layers:
-            weight_init.c2_msra_fill(layer)
-        # use normal distribution initialization for mask prediction layer
-        nn.init.normal_(self.predictor.weight, std=0.001)
-        if self.predictor.bias is not None:
-            nn.init.constant_(self.predictor.bias, 0)
-
-    def forward(self, fine_grained_features, coarse_features):
-        x = torch.cat((fine_grained_features, coarse_features), dim=1)
-        for layer in self.fc_layers:
-            x = F.relu(layer(x))
-            if self.coarse_pred_each_layer:
-                x = cat((x, coarse_features), dim=1)
-        return self.predictor(x)
-
-
-def build_point_head(cfg, input_channels):
-    """
-    Build a point head defined by `cfg.MODEL.POINT_HEAD.NAME`.
-    """
-    head_name = cfg.MODEL.POINT_HEAD.NAME
-    return POINT_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py
deleted file mode 100644
index 4f7225b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import torch
-
-from detectron2.layers import ShapeSpec, cat, interpolate
-from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
-from detectron2.modeling.roi_heads.mask_head import (
-    build_mask_head,
-    mask_rcnn_inference,
-    mask_rcnn_loss,
-)
-from detectron2.modeling.roi_heads.roi_heads import select_foreground_proposals
-
-from .point_features import (
-    generate_regular_grid_point_coords,
-    get_uncertain_point_coords_on_grid,
-    get_uncertain_point_coords_with_randomness,
-    point_sample,
-    point_sample_fine_grained_features,
-)
-from .point_head import build_point_head, roi_mask_point_loss
-
-
-def calculate_uncertainty(logits, classes):
-    """
-    We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the
-        foreground class in `classes`.
-
-    Args:
-        logits (Tensor): A tensor of shape (R, C, ...) or (R, 1, ...) for class-specific or
-            class-agnostic, where R is the total number of predicted masks in all images and C is
-            the number of foreground classes. The values are logits.
-        classes (list): A list of length R that contains either predicted of ground truth class
-            for eash predicted mask.
-
-    Returns:
-        scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with
-            the most uncertain locations having the highest uncertainty score.
-    """
-    if logits.shape[1] == 1:
-        gt_class_logits = logits.clone()
-    else:
-        gt_class_logits = logits[
-            torch.arange(logits.shape[0], device=logits.device), classes
-        ].unsqueeze(1)
-    return -(torch.abs(gt_class_logits))
-
-
-@ROI_HEADS_REGISTRY.register()
-class PointRendROIHeads(StandardROIHeads):
-    """
-    The RoI heads class for PointRend instance segmentation models.
-
-    In this class we redefine the mask head of `StandardROIHeads` leaving all other heads intact.
-    To avoid namespace conflict with other heads we use names starting from `mask_` for all
-    variables that correspond to the mask head in the class's namespace.
-    """
-
-    def __init__(self, cfg, input_shape):
-        # TODO use explicit args style
-        super().__init__(cfg, input_shape)
-        self._init_mask_head(cfg, input_shape)
-
-    def _init_mask_head(self, cfg, input_shape):
-        # fmt: off
-        self.mask_on                 = cfg.MODEL.MASK_ON
-        if not self.mask_on:
-            return
-        self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES
-        self.mask_coarse_side_size   = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
-        self._feature_scales         = {k: 1.0 / v.stride for k, v in input_shape.items()}
-        # fmt: on
-
-        in_channels = np.sum([input_shape[f].channels for f in self.mask_coarse_in_features])
-        self.mask_coarse_head = build_mask_head(
-            cfg,
-            ShapeSpec(
-                channels=in_channels,
-                width=self.mask_coarse_side_size,
-                height=self.mask_coarse_side_size,
-            ),
-        )
-        self._init_point_head(cfg, input_shape)
-
-    def _init_point_head(self, cfg, input_shape):
-        # fmt: off
-        self.mask_point_on                      = cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON
-        if not self.mask_point_on:
-            return
-        assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES
-        self.mask_point_in_features             = cfg.MODEL.POINT_HEAD.IN_FEATURES
-        self.mask_point_train_num_points        = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS
-        self.mask_point_oversample_ratio        = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO
-        self.mask_point_importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO
-        # next two parameters are use in the adaptive subdivions inference procedure
-        self.mask_point_subdivision_steps       = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS
-        self.mask_point_subdivision_num_points  = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS
-        # fmt: on
-
-        in_channels = np.sum([input_shape[f].channels for f in self.mask_point_in_features])
-        self.mask_point_head = build_point_head(
-            cfg, ShapeSpec(channels=in_channels, width=1, height=1)
-        )
-
-    def _forward_mask(self, features, instances):
-        """
-        Forward logic of the mask prediction branch.
-
-        Args:
-            features (dict[str, Tensor]): #level input features for mask prediction
-            instances (list[Instances]): the per-image instances to train/predict masks.
-                In training, they can be the proposals.
-                In inference, they can be the predicted boxes.
-
-        Returns:
-            In training, a dict of losses.
-            In inference, update `instances` with new fields "pred_masks" and return it.
-        """
-        if not self.mask_on:
-            return {} if self.training else instances
-
-        if self.training:
-            proposals, _ = select_foreground_proposals(instances, self.num_classes)
-            proposal_boxes = [x.proposal_boxes for x in proposals]
-            mask_coarse_logits = self._forward_mask_coarse(features, proposal_boxes)
-
-            losses = {"loss_mask": mask_rcnn_loss(mask_coarse_logits, proposals)}
-            losses.update(self._forward_mask_point(features, mask_coarse_logits, proposals))
-            return losses
-        else:
-            pred_boxes = [x.pred_boxes for x in instances]
-            mask_coarse_logits = self._forward_mask_coarse(features, pred_boxes)
-
-            mask_logits = self._forward_mask_point(features, mask_coarse_logits, instances)
-            mask_rcnn_inference(mask_logits, instances)
-            return instances
-
-    def _forward_mask_coarse(self, features, boxes):
-        """
-        Forward logic of the coarse mask head.
-        """
-        point_coords = generate_regular_grid_point_coords(
-            np.sum(len(x) for x in boxes), self.mask_coarse_side_size, boxes[0].device
-        )
-        mask_coarse_features_list = [features[k] for k in self.mask_coarse_in_features]
-        features_scales = [self._feature_scales[k] for k in self.mask_coarse_in_features]
-        # For regular grids of points, this function is equivalent to `len(features_list)' calls
-        # of `ROIAlign` (with `SAMPLING_RATIO=2`), and concat the results.
-        mask_features, _ = point_sample_fine_grained_features(
-            mask_coarse_features_list, features_scales, boxes, point_coords
-        )
-        return self.mask_coarse_head(mask_features)
-
-    def _forward_mask_point(self, features, mask_coarse_logits, instances):
-        """
-        Forward logic of the mask point head.
-        """
-        if not self.mask_point_on:
-            return {} if self.training else mask_coarse_logits
-
-        mask_features_list = [features[k] for k in self.mask_point_in_features]
-        features_scales = [self._feature_scales[k] for k in self.mask_point_in_features]
-
-        if self.training:
-            proposal_boxes = [x.proposal_boxes for x in instances]
-            gt_classes = cat([x.gt_classes for x in instances])
-            with torch.no_grad():
-                point_coords = get_uncertain_point_coords_with_randomness(
-                    mask_coarse_logits,
-                    lambda logits: calculate_uncertainty(logits, gt_classes),
-                    self.mask_point_train_num_points,
-                    self.mask_point_oversample_ratio,
-                    self.mask_point_importance_sample_ratio,
-                )
-
-            fine_grained_features, point_coords_wrt_image = point_sample_fine_grained_features(
-                mask_features_list, features_scales, proposal_boxes, point_coords
-            )
-            coarse_features = point_sample(mask_coarse_logits, point_coords, align_corners=False)
-            point_logits = self.mask_point_head(fine_grained_features, coarse_features)
-            return {
-                "loss_mask_point": roi_mask_point_loss(
-                    point_logits, instances, point_coords_wrt_image
-                )
-            }
-        else:
-            pred_boxes = [x.pred_boxes for x in instances]
-            pred_classes = cat([x.pred_classes for x in instances])
-            # The subdivision code will fail with the empty list of boxes
-            if len(pred_classes) == 0:
-                return mask_coarse_logits
-
-            mask_logits = mask_coarse_logits.clone()
-            for subdivions_step in range(self.mask_point_subdivision_steps):
-                mask_logits = interpolate(
-                    mask_logits, scale_factor=2, mode="bilinear", align_corners=False
-                )
-                # If `mask_point_subdivision_num_points` is larger or equal to the
-                # resolution of the next step, then we can skip this step
-                H, W = mask_logits.shape[-2:]
-                if (
-                    self.mask_point_subdivision_num_points >= 4 * H * W
-                    and subdivions_step < self.mask_point_subdivision_steps - 1
-                ):
-                    continue
-                uncertainty_map = calculate_uncertainty(mask_logits, pred_classes)
-                point_indices, point_coords = get_uncertain_point_coords_on_grid(
-                    uncertainty_map, self.mask_point_subdivision_num_points
-                )
-                fine_grained_features, _ = point_sample_fine_grained_features(
-                    mask_features_list, features_scales, pred_boxes, point_coords
-                )
-                coarse_features = point_sample(
-                    mask_coarse_logits, point_coords, align_corners=False
-                )
-                point_logits = self.mask_point_head(fine_grained_features, coarse_features)
-
-                # put mask point predictions to the right places on the upsampled grid.
-                R, C, H, W = mask_logits.shape
-                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
-                mask_logits = (
-                    mask_logits.reshape(R, C, H * W)
-                    .scatter_(2, point_indices, point_logits)
-                    .view(R, C, H, W)
-                )
-            return mask_logits
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py
deleted file mode 100644
index 670a0ea..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-from typing import Dict
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from detectron2.layers import ShapeSpec, cat
-from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
-
-from .point_features import (
-    get_uncertain_point_coords_on_grid,
-    get_uncertain_point_coords_with_randomness,
-    point_sample,
-)
-from .point_head import build_point_head
-
-
-def calculate_uncertainty(sem_seg_logits):
-    """
-    For each location of the prediction `sem_seg_logits` we estimate uncerainty as the
-        difference between top first and top second predicted logits.
-
-    Args:
-        mask_logits (Tensor): A tensor of shape (N, C, ...), where N is the minibatch size and
-            C is the number of foreground classes. The values are logits.
-
-    Returns:
-        scores (Tensor): A tensor of shape (N, 1, ...) that contains uncertainty scores with
-            the most uncertain locations having the highest uncertainty score.
-    """
-    top2_scores = torch.topk(sem_seg_logits, k=2, dim=1)[0]
-    return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1)
-
-
-@SEM_SEG_HEADS_REGISTRY.register()
-class PointRendSemSegHead(nn.Module):
-    """
-    A semantic segmentation head that combines a head set in `POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME`
-        and a point head set in `MODEL.POINT_HEAD.NAME`.
-    """
-
-    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
-        super().__init__()
-
-        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
-
-        self.coarse_sem_seg_head = SEM_SEG_HEADS_REGISTRY.get(
-            cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME
-        )(cfg, input_shape)
-        self._init_point_head(cfg, input_shape)
-
-    def _init_point_head(self, cfg, input_shape: Dict[str, ShapeSpec]):
-        # fmt: off
-        assert cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES
-        feature_channels             = {k: v.channels for k, v in input_shape.items()}
-        self.in_features             = cfg.MODEL.POINT_HEAD.IN_FEATURES
-        self.train_num_points        = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS
-        self.oversample_ratio        = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO
-        self.importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO
-        self.subdivision_steps       = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS
-        self.subdivision_num_points  = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS
-        # fmt: on
-
-        in_channels = np.sum([feature_channels[f] for f in self.in_features])
-        self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1))
-
-    def forward(self, features, targets=None):
-        coarse_sem_seg_logits = self.coarse_sem_seg_head.layers(features)
-
-        if self.training:
-            losses = self.coarse_sem_seg_head.losses(coarse_sem_seg_logits, targets)
-
-            with torch.no_grad():
-                point_coords = get_uncertain_point_coords_with_randomness(
-                    coarse_sem_seg_logits,
-                    calculate_uncertainty,
-                    self.train_num_points,
-                    self.oversample_ratio,
-                    self.importance_sample_ratio,
-                )
-            coarse_features = point_sample(coarse_sem_seg_logits, point_coords, align_corners=False)
-
-            fine_grained_features = cat(
-                [
-                    point_sample(features[in_feature], point_coords, align_corners=False)
-                    for in_feature in self.in_features
-                ]
-            )
-            point_logits = self.point_head(fine_grained_features, coarse_features)
-            point_targets = (
-                point_sample(
-                    targets.unsqueeze(1).to(torch.float),
-                    point_coords,
-                    mode="nearest",
-                    align_corners=False,
-                )
-                .squeeze(1)
-                .to(torch.long)
-            )
-            losses["loss_sem_seg_point"] = F.cross_entropy(
-                point_logits, point_targets, reduction="mean", ignore_index=self.ignore_value
-            )
-            return None, losses
-        else:
-            sem_seg_logits = coarse_sem_seg_logits.clone()
-            for _ in range(self.subdivision_steps):
-                sem_seg_logits = F.interpolate(
-                    sem_seg_logits, scale_factor=2, mode="bilinear", align_corners=False
-                )
-                uncertainty_map = calculate_uncertainty(sem_seg_logits)
-                point_indices, point_coords = get_uncertain_point_coords_on_grid(
-                    uncertainty_map, self.subdivision_num_points
-                )
-                fine_grained_features = cat(
-                    [
-                        point_sample(features[in_feature], point_coords, align_corners=False)
-                        for in_feature in self.in_features
-                    ]
-                )
-                coarse_features = point_sample(
-                    coarse_sem_seg_logits, point_coords, align_corners=False
-                )
-                point_logits = self.point_head(fine_grained_features, coarse_features)
-
-                # put sem seg point predictions to the right places on the upsampled grid.
-                N, C, H, W = sem_seg_logits.shape
-                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
-                sem_seg_logits = (
-                    sem_seg_logits.reshape(N, C, H * W)
-                    .scatter_(2, point_indices, point_logits)
-                    .view(N, C, H, W)
-                )
-            return sem_seg_logits, {}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh
deleted file mode 100644
index 4ee1614..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-python finetune_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml --num-gpus 1
-#python finetune_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml --num-gpus 1
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py
deleted file mode 100644
index 7832867..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-PointRend Training Script.
-
-This script is a simplified version of the training script in detectron2/tools.
-"""
-
-import os
-import torch
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog, build_detection_train_loader
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
-from detectron2.evaluation import (
-    CityscapesInstanceEvaluator,
-    CityscapesSemSegEvaluator,
-    COCOEvaluator,
-    DatasetEvaluators,
-    LVISEvaluator,
-    SemSegEvaluator,
-    verify_results,
-)
-
-from point_rend import SemSegDatasetMapper, add_pointrend_config
-
-
-class Trainer(DefaultTrainer):
-    """
-    We use the "DefaultTrainer" which contains a number pre-defined logic for
-    standard training workflow. They may not work for you, especially if you
-    are working on a new research project. In that case you can use the cleaner
-    "SimpleTrainer", or write your own training loop.
-    """
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        """
-        Create evaluator(s) for a given dataset.
-        This uses the special metadata "evaluator_type" associated with each builtin dataset.
-        For your own dataset, you can simply create an evaluator manually in your
-        script and do not have to worry about the hacky if-else logic here.
-        """
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        evaluator_list = []
-        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
-        if evaluator_type == "lvis":
-            return LVISEvaluator(dataset_name, cfg, True, output_folder)
-        if evaluator_type == "coco":
-            return COCOEvaluator(dataset_name, cfg, True, output_folder)
-        if evaluator_type == "sem_seg":
-            return SemSegEvaluator(
-                dataset_name,
-                distributed=True,
-                num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
-                ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
-                output_dir=output_folder,
-            )
-        if evaluator_type == "cityscapes_instance":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesInstanceEvaluator(dataset_name)
-        if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesSemSegEvaluator(dataset_name)
-        if len(evaluator_list) == 0:
-            raise NotImplementedError(
-                "no Evaluator for the dataset {} with the type {}".format(
-                    dataset_name, evaluator_type
-                )
-            )
-        if len(evaluator_list) == 1:
-            return evaluator_list[0]
-        return DatasetEvaluators(evaluator_list)
-
-    @classmethod
-    def build_train_loader(cls, cfg):
-        if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE:
-            mapper = SemSegDatasetMapper(cfg, True)
-        else:
-            mapper = None
-        return build_detection_train_loader(cfg, mapper=mapper)
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    add_pointrend_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md
deleted file mode 100644
index 36263bd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-
-Here are a few projects that are built on detectron2.
-They are examples of how to use detectron2 as a library, to make your projects more
-maintainable.
-
-## Projects by Facebook
-
-Note that these are research projects, and therefore may not have the same level
-of support or stability of detectron2.
-
-+ [DensePose: Dense Human Pose Estimation In The Wild](DensePose)
-+ [Scale-Aware Trident Networks for Object Detection](TridentNet)
-+ [TensorMask: A Foundation for Dense Object Segmentation](TensorMask)
-+ [Mesh R-CNN](https://github.com/facebookresearch/meshrcnn)
-+ [PointRend: Image Segmentation as Rendering](PointRend)
-+ [Momentum Contrast for Unsupervised Visual Representation Learning](https://github.com/facebookresearch/moco/tree/master/detection)
-
-
-## External Projects
-
-External projects in the community that use detectron2:
-
-<!--
- - If you want to contribute, note that:
- -  1. please add your project to the end of the list and try to use only one line
- -  2. the project must provide models trained on standard data
- -->
-
-+ [VoVNet backbones](https://github.com/youngwanLEE/vovnet-detectron2).
-+ [AdelaiDet](https://github.com/aim-uofa/adet), a detection toolbox from the Universtiy of Adelaide.
-+ [CenterMask : Real-Time Anchor-Free Instance Segmentation](https://github.com/youngwanLEE/centermask2)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md
deleted file mode 100644
index 6831508..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-
-# TensorMask in Detectron2
-**A Foundation for Dense Object Segmentation**
-
-Xinlei Chen, Ross Girshick, Kaiming He, Piotr Dollár
-
-[[`arXiv`](https://arxiv.org/abs/1903.12174)] [[`BibTeX`](#CitingTensorMask)]
-
-<div align="center">
-  <img src="http://xinleic.xyz/images/tmask.png" width="700px" />
-</div>
-
-In this repository, we release code for TensorMask in Detectron2.
-TensorMask is a dense sliding-window instance segmentation framework that, for the first time, achieves results close to the well-developed Mask R-CNN framework -- both qualitatively and quantitatively. It establishes a conceptually complementary direction for object instance segmentation research.
-
-## Installation
-First install Detectron2 following the [documentation](https://detectron2.readthedocs.io/tutorials/install.html) and
-[setup the dataset](../../datasets). Then compile the TensorMask-specific op (`swap_align2nat`):
-```bash
-cd /path/to/detectron2/projects/TensorMask
-python setup.py build develop
-```
-
-## Training
-
-To train a model, run:
-```bash
-python /path/to/detectron2/projects/TensorMask/train_net.py --config-file <config.yaml>
-```
-
-For example, to launch TensorMask BiPyramid training (1x schedule) with ResNet-50 backbone on 8 GPUs,
-one should execute:
-```bash
-python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_1x.yaml --num-gpus 8
-```
-
-## Evaluation
-
-Model evaluation can be done similarly (6x schedule with scale augmentation):
-```bash
-python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_6x.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint
-```
-
-# Pretrained Models
-
-| Backbone | lr sched | AP box | AP mask | download                                                                                                                                    |
-| -------- | -------- | --     | ---  | --------                                                                                                                                    |
-| R50      | 1x       | 37.6   | 32.4 | <a href="https://dl.fbaipublicfiles.com/detectron2/TensorMask/tensormask_R_50_FPN_1x/152549419/model_final_8f325c.pkl">model</a>&nbsp;\| &nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TensorMask/tensormask_R_50_FPN_1x/152549419/metrics.json">metrics</a> |
-| R50      | 6x       | 41.4   | 35.8 | <a href="https://dl.fbaipublicfiles.com/detectron2/TensorMask/tensormask_R_50_FPN_6x/153538791/model_final_e8df31.pkl">model</a>&nbsp;\| &nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TensorMask/tensormask_R_50_FPN_6x/153538791/metrics.json">metrics</a> |
-
-
-## <a name="CitingTensorMask"></a>Citing TensorMask
-
-If you use TensorMask, please use the following BibTeX entry.
-
-```
-@InProceedings{chen2019tensormask,
-  title={Tensormask: A Foundation for Dense Object Segmentation},
-  author={Chen, Xinlei and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr},
-  journal={The International Conference on Computer Vision (ICCV)},
-  year={2019}
-}
-```
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml
deleted file mode 100644
index a724534..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "TensorMask"
-  MASK_ON: True
-  BACKBONE:
-    NAME: "build_retinanet_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[44, 60], [88, 120], [176, 240], [352, 480], [704, 960], [1408, 1920]]
-    ASPECT_RATIOS: [[1.0]]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-    FUSE_TYPE: "avg"
-  TENSOR_MASK:
-    ALIGNED_ON: True
-    BIPYRAMID_ON: True
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml
deleted file mode 100644
index 5d5eee1..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: "Base-TensorMask.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml
deleted file mode 100644
index 366a965..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-_BASE_: "Base-TensorMask.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (480000, 520000)
-  MAX_ITER: 540000
-INPUT:
-  MIN_SIZE_TRAIN_SAMPLING: "range"
-  MIN_SIZE_TRAIN: (640, 800)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py
deleted file mode 100644
index 0194e76..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import glob
-import os
-from setuptools import find_packages, setup
-import torch
-from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
-
-
-def get_extensions():
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-    extensions_dir = os.path.join(this_dir, "tensormask", "layers", "csrc")
-
-    main_source = os.path.join(extensions_dir, "vision.cpp")
-    sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
-    source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
-        os.path.join(extensions_dir, "*.cu")
-    )
-
-    sources = [main_source] + sources
-
-    extension = CppExtension
-
-    extra_compile_args = {"cxx": []}
-    define_macros = []
-
-    if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
-        extension = CUDAExtension
-        sources += source_cuda
-        define_macros += [("WITH_CUDA", None)]
-        extra_compile_args["nvcc"] = [
-            "-DCUDA_HAS_FP16=1",
-            "-D__CUDA_NO_HALF_OPERATORS__",
-            "-D__CUDA_NO_HALF_CONVERSIONS__",
-            "-D__CUDA_NO_HALF2_OPERATORS__",
-        ]
-
-        # It's better if pytorch can do this by default ..
-        CC = os.environ.get("CC", None)
-        if CC is not None:
-            extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
-
-    sources = [os.path.join(extensions_dir, s) for s in sources]
-
-    include_dirs = [extensions_dir]
-
-    ext_modules = [
-        extension(
-            "tensormask._C",
-            sources,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        )
-    ]
-
-    return ext_modules
-
-
-setup(
-    name="tensormask",
-    version="0.1",
-    author="FAIR",
-    packages=find_packages(exclude=("configs", "tests")),
-    python_requires=">=3.6",
-    ext_modules=get_extensions(),
-    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
-)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py
deleted file mode 100644
index e3b642a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .config import add_tensormask_config
-from .arch import TensorMask
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py
deleted file mode 100644
index a3e89c6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import copy
-import logging
-import math
-from typing import List
-import torch
-import torch.nn.functional as F
-from fvcore.nn import sigmoid_focal_loss_star_jit, smooth_l1_loss
-from torch import nn
-
-from detectron2.layers import ShapeSpec, batched_nms, cat, paste_masks_in_image
-from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
-from detectron2.modeling.backbone import build_backbone
-from detectron2.modeling.box_regression import Box2BoxTransform
-from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
-from detectron2.modeling.meta_arch.retinanet import (
-    permute_all_cls_and_box_to_N_HWA_K_and_concat,
-    permute_to_N_HWA_K,
-)
-from detectron2.structures import Boxes, ImageList, Instances
-from detectron2.utils.logger import log_first_n
-
-from tensormask.layers import SwapAlign2Nat
-
-__all__ = ["TensorMask"]
-
-
-def _assignment_rule(
-    gt_boxes,
-    anchor_boxes,
-    unit_lengths,
-    min_anchor_size,
-    scale_thresh=2.0,
-    spatial_thresh=1.0,
-    uniqueness_on=True,
-):
-    """
-    Given two lists of boxes of N ground truth boxes and M anchor boxes,
-    compute the assignment between the two, following the assignment rules in
-    https://arxiv.org/abs/1903.12174.
-    The box order must be (xmin, ymin, xmax, ymax), so please make sure to convert
-    to BoxMode.XYXY_ABS before calling this function.
-
-    Args:
-        gt_boxes, anchor_boxes (Boxes): two Boxes. Contains N & M boxes/anchors, respectively.
-        unit_lengths (Tensor): Contains the unit lengths of M anchor boxes.
-        min_anchor_size (float): Minimum size of the anchor, in pixels
-        scale_thresh (float): The `scale` threshold: the maximum size of the anchor
-                              should not be greater than scale_thresh x max(h, w) of
-                              the ground truth box.
-        spatial_thresh (float): The `spatial` threshold: the l2 distance between the
-                              center of the anchor and the ground truth box should not
-                              be greater than spatial_thresh x u where u is the unit length.
-
-    Returns:
-        matches (Tensor[int64]): a vector of length M, where matches[i] is a matched
-                ground-truth index in [0, N)
-        match_labels (Tensor[int8]): a vector of length M, where pred_labels[i] indicates
-            whether a prediction is a true or false positive or ignored
-    """
-    gt_boxes, anchor_boxes = gt_boxes.tensor, anchor_boxes.tensor
-    N = gt_boxes.shape[0]
-    M = anchor_boxes.shape[0]
-    if N == 0 or M == 0:
-        return (
-            gt_boxes.new_full((N,), 0, dtype=torch.int64),
-            gt_boxes.new_full((N,), -1, dtype=torch.int8),
-        )
-
-    # Containment rule
-    lt = torch.min(gt_boxes[:, None, :2], anchor_boxes[:, :2])  # [N,M,2]
-    rb = torch.max(gt_boxes[:, None, 2:], anchor_boxes[:, 2:])  # [N,M,2]
-    union = cat([lt, rb], dim=2)  # [N,M,4]
-
-    dummy_gt_boxes = torch.zeros_like(gt_boxes)
-    anchor = dummy_gt_boxes[:, None, :] + anchor_boxes[:, :]  # [N,M,4]
-
-    contain_matrix = torch.all(union == anchor, dim=2)  # [N,M]
-
-    # Centrality rule, scale
-    gt_size_lower = torch.max(gt_boxes[:, 2:] - gt_boxes[:, :2], dim=1)[0]  # [N]
-    gt_size_upper = gt_size_lower * scale_thresh  # [N]
-    # Fall back for small objects
-    gt_size_upper[gt_size_upper < min_anchor_size] = min_anchor_size
-    # Due to sampling of locations, the anchor sizes are deducted with sampling strides
-    anchor_size = (
-        torch.max(anchor_boxes[:, 2:] - anchor_boxes[:, :2], dim=1)[0] - unit_lengths
-    )  # [M]
-
-    size_diff_upper = gt_size_upper[:, None] - anchor_size  # [N,M]
-    scale_matrix = size_diff_upper >= 0  # [N,M]
-
-    # Centrality rule, spatial
-    gt_center = (gt_boxes[:, 2:] + gt_boxes[:, :2]) / 2  # [N,2]
-    anchor_center = (anchor_boxes[:, 2:] + anchor_boxes[:, :2]) / 2  # [M,2]
-    offset_center = gt_center[:, None, :] - anchor_center[:, :]  # [N,M,2]
-    offset_center /= unit_lengths[:, None]  # [N,M,2]
-    spatial_square = spatial_thresh * spatial_thresh
-    spatial_matrix = torch.sum(offset_center * offset_center, dim=2) <= spatial_square
-
-    assign_matrix = (contain_matrix & scale_matrix & spatial_matrix).int()
-
-    # assign_matrix is N (gt) x M (predicted)
-    # Max over gt elements (dim 0) to find best gt candidate for each prediction
-    matched_vals, matches = assign_matrix.max(dim=0)
-    match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
-
-    match_labels[matched_vals == 0] = 0
-    match_labels[matched_vals == 1] = 1
-
-    # find all the elements that match to ground truths multiple times
-    not_unique_idxs = assign_matrix.sum(dim=0) > 1
-    if uniqueness_on:
-        match_labels[not_unique_idxs] = 0
-    else:
-        match_labels[not_unique_idxs] = -1
-
-    return matches, match_labels
-
-
-# TODO make the paste_mask function in d2 core support mask list
-def _paste_mask_lists_in_image(masks, boxes, image_shape, threshold=0.5):
-    """
-    Paste a list of masks that are of various resolutions (e.g., 28 x 28) into an image.
-    The location, height, and width for pasting each mask is determined by their
-    corresponding bounding boxes in boxes.
-
-    Args:
-        masks (list(Tensor)): A list of Tensor of shape (1, Hmask_i, Wmask_i).
-                            Values are in [0, 1]. The list length, Bimg, is the
-                            number of detected object instances in the image.
-        boxes (Boxes): A Boxes of length Bimg. boxes.tensor[i] and masks[i] correspond
-                            to the same object instance.
-        image_shape (tuple): height, width
-        threshold (float): A threshold in [0, 1] for converting the (soft) masks to
-            binary masks.
-
-    Returns:
-        img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the
-        number of detected object instances and Himage, Wimage are the image width
-        and height. img_masks[i] is a binary mask for object instance i.
-    """
-    if len(masks) == 0:
-        return torch.empty((0, 1) + image_shape, dtype=torch.uint8)
-
-    # Loop over masks groups. Each group has the same mask prediction size.
-    img_masks = []
-    ind_masks = []
-    mask_sizes = torch.tensor([m.shape[-1] for m in masks])
-    unique_sizes = torch.unique(mask_sizes)
-    for msize in unique_sizes.tolist():
-        cur_ind = torch.where(mask_sizes == msize)[0]
-        ind_masks.append(cur_ind)
-
-        cur_masks = cat([masks[i] for i in cur_ind])
-        cur_boxes = boxes[cur_ind]
-        img_masks.append(paste_masks_in_image(cur_masks, cur_boxes, image_shape, threshold))
-
-    img_masks = cat(img_masks)
-    ind_masks = cat(ind_masks)
-
-    img_masks_out = torch.empty_like(img_masks)
-    img_masks_out[ind_masks, :, :] = img_masks
-
-    return img_masks_out
-
-
-def _postprocess(results, result_mask_info, output_height, output_width, mask_threshold=0.5):
-    """
-    Post-process the output boxes for TensorMask.
-    The input images are often resized when entering an object detector.
-    As a result, we often need the outputs of the detector in a different
-    resolution from its inputs.
-
-    This function will postprocess the raw outputs of TensorMask
-    to produce outputs according to the desired output resolution.
-
-    Args:
-        results (Instances): the raw outputs from the detector.
-            `results.image_size` contains the input image resolution the detector sees.
-            This object might be modified in-place. Note that it does not contain the field
-            `pred_masks`, which is provided by another input `result_masks`.
-        result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results.
-                The first item is a list of #detection tensors, each is the predicted masks.
-                The second item is the anchors corresponding to the predicted masks.
-        output_height, output_width: the desired output resolution.
-
-    Returns:
-        Instances: the postprocessed output from the model, based on the output resolution
-    """
-    scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
-    results = Instances((output_height, output_width), **results.get_fields())
-
-    output_boxes = results.pred_boxes
-    output_boxes.tensor[:, 0::2] *= scale_x
-    output_boxes.tensor[:, 1::2] *= scale_y
-    output_boxes.clip(results.image_size)
-
-    inds_nonempty = output_boxes.nonempty()
-    results = results[inds_nonempty]
-    result_masks, result_anchors = result_mask_info
-    if result_masks:
-        result_anchors.tensor[:, 0::2] *= scale_x
-        result_anchors.tensor[:, 1::2] *= scale_y
-        result_masks = [x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i]
-        results.pred_masks = _paste_mask_lists_in_image(
-            result_masks,
-            result_anchors[inds_nonempty],
-            results.image_size,
-            threshold=mask_threshold,
-        )
-    return results
-
-
-class TensorMaskAnchorGenerator(DefaultAnchorGenerator):
-    """
-    For a set of image sizes and feature maps, computes a set of anchors for TensorMask.
-    It also computes the unit lengths and indexes for each anchor box.
-    """
-
-    def grid_anchors_with_unit_lengths_and_indexes(self, grid_sizes):
-        anchors = []
-        unit_lengths = []
-        indexes = []
-        for lvl, (size, stride, base_anchors) in enumerate(
-            zip(grid_sizes, self.strides, self.cell_anchors)
-        ):
-            grid_height, grid_width = size
-            device = base_anchors.device
-            shifts_x = torch.arange(
-                0, grid_width * stride, step=stride, dtype=torch.float32, device=device
-            )
-            shifts_y = torch.arange(
-                0, grid_height * stride, step=stride, dtype=torch.float32, device=device
-            )
-            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
-            shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=2)
-            # Stack anchors in shapes of (HWA, 4)
-            cur_anchor = (shifts[:, :, None, :] + base_anchors.view(1, 1, -1, 4)).view(-1, 4)
-            anchors.append(cur_anchor)
-            unit_lengths.append(
-                torch.full((cur_anchor.shape[0],), stride, dtype=torch.float32, device=device)
-            )
-            # create mask indexes using mesh grid
-            shifts_l = torch.full((1,), lvl, dtype=torch.int64, device=device)
-            shifts_i = torch.zeros((1,), dtype=torch.int64, device=device)
-            shifts_h = torch.arange(0, grid_height, dtype=torch.int64, device=device)
-            shifts_w = torch.arange(0, grid_width, dtype=torch.int64, device=device)
-            shifts_a = torch.arange(0, base_anchors.shape[0], dtype=torch.int64, device=device)
-            grids = torch.meshgrid(shifts_l, shifts_i, shifts_h, shifts_w, shifts_a)
-
-            indexes.append(torch.stack(grids, dim=5).view(-1, 5))
-
-        return anchors, unit_lengths, indexes
-
-    def forward(self, features):
-        """
-        Returns:
-            list[list[Boxes]]: a list of #image elements. Each is a list of #feature level Boxes.
-                The Boxes contains anchors of this image on the specific feature level.
-            list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors.
-                The tensor contains strides, or unit lengths for the anchors.
-            list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors.
-                The Tensor contains indexes for the anchors, with the last dimension meaning
-                (L, N, H, W, A), where L is level, I is image (not set yet), H is height,
-                W is width, and A is anchor.
-        """
-        num_images = len(features[0])
-        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
-        anchors_list, lengths_list, indexes_list = self.grid_anchors_with_unit_lengths_and_indexes(
-            grid_sizes
-        )
-
-        # Convert anchors from Tensor to Boxes
-        anchors_per_im = [Boxes(x) for x in anchors_list]
-
-        # TODO it can be simplified to not return duplicated information for
-        # each image, just like detectron2's own AnchorGenerator
-        anchors = [copy.deepcopy(anchors_per_im) for _ in range(num_images)]
-        unit_lengths = [copy.deepcopy(lengths_list) for _ in range(num_images)]
-        indexes = [copy.deepcopy(indexes_list) for _ in range(num_images)]
-
-        return anchors, unit_lengths, indexes
-
-
-@META_ARCH_REGISTRY.register()
-class TensorMask(nn.Module):
-    """
-    TensorMask model. Creates FPN backbone, anchors and a head for classification
-    and box regression. Calculates and applies proper losses to class, box, and
-    masks.
-    """
-
-    def __init__(self, cfg):
-        super().__init__()
-
-        # fmt: off
-        self.num_classes              = cfg.MODEL.TENSOR_MASK.NUM_CLASSES
-        self.in_features              = cfg.MODEL.TENSOR_MASK.IN_FEATURES
-        self.anchor_sizes             = cfg.MODEL.ANCHOR_GENERATOR.SIZES
-        self.num_levels               = len(cfg.MODEL.ANCHOR_GENERATOR.SIZES)
-        # Loss parameters:
-        self.focal_loss_alpha         = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA
-        self.focal_loss_gamma         = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA
-        # Inference parameters:
-        self.score_threshold          = cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST
-        self.topk_candidates          = cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST
-        self.nms_threshold            = cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST
-        self.detections_im            = cfg.TEST.DETECTIONS_PER_IMAGE
-        # Mask parameters:
-        self.mask_on                  = cfg.MODEL.MASK_ON
-        self.mask_loss_weight         = cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT
-        self.mask_pos_weight          = torch.tensor(cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT,
-                                                     dtype=torch.float32)
-        self.bipyramid_on             = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON
-        # fmt: on
-
-        # build the backbone
-        self.backbone = build_backbone(cfg)
-
-        backbone_shape = self.backbone.output_shape()
-        feature_shapes = [backbone_shape[f] for f in self.in_features]
-        feature_strides = [x.stride for x in feature_shapes]
-        # build anchors
-        self.anchor_generator = TensorMaskAnchorGenerator(cfg, feature_shapes)
-        self.num_anchors = self.anchor_generator.num_cell_anchors[0]
-        anchors_min_level = cfg.MODEL.ANCHOR_GENERATOR.SIZES[0]
-        self.mask_sizes = [size // feature_strides[0] for size in anchors_min_level]
-        self.min_anchor_size = min(anchors_min_level) - feature_strides[0]
-
-        # head of the TensorMask
-        self.head = TensorMaskHead(
-            cfg, self.num_levels, self.num_anchors, self.mask_sizes, feature_shapes
-        )
-        # box transform
-        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS)
-        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
-        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
-
-    @property
-    def device(self):
-        return self.pixel_mean.device
-
-    def forward(self, batched_inputs):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DetectionTransform` .
-                Each item in the list contains the inputs for one image.
-            For now, each item in the list is a dict that contains:
-                image: Tensor, image in (C, H, W) format.
-                instances: Instances
-                Other information that's included in the original dicts, such as:
-                    "height", "width" (int): the output resolution of the model, used in inference.
-                        See :meth:`postprocess` for details.
-         Returns:
-            losses (dict[str: Tensor]): mapping from a named loss to a tensor
-                storing the loss. Used during training only.
-        """
-        images = self.preprocess_image(batched_inputs)
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        elif "targets" in batched_inputs[0]:
-            log_first_n(
-                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
-            )
-            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-
-        features = self.backbone(images.tensor)
-        features = [features[f] for f in self.in_features]
-        # apply the TensorMask head
-        pred_logits, pred_deltas, pred_masks = self.head(features)
-        # generate anchors based on features, is it image specific?
-        anchors, unit_lengths, indexes = self.anchor_generator(features)
-
-        if self.training:
-            # get ground truths for class labels and box targets, it will label each anchor
-            gt_class_info, gt_delta_info, gt_mask_info, num_fg = self.get_ground_truth(
-                anchors, unit_lengths, indexes, gt_instances
-            )
-            # compute the loss
-            return self.losses(
-                gt_class_info,
-                gt_delta_info,
-                gt_mask_info,
-                num_fg,
-                pred_logits,
-                pred_deltas,
-                pred_masks,
-            )
-        else:
-            # do inference to get the output
-            results = self.inference(pred_logits, pred_deltas, pred_masks, anchors, indexes, images)
-            processed_results = []
-            for results_im, input_im, image_size in zip(
-                results, batched_inputs, images.image_sizes
-            ):
-                height = input_im.get("height", image_size[0])
-                width = input_im.get("width", image_size[1])
-                # this is to do post-processing with the image size
-                result_box, result_mask = results_im
-                r = _postprocess(result_box, result_mask, height, width)
-                processed_results.append({"instances": r})
-            return processed_results
-
-    def losses(
-        self,
-        gt_class_info,
-        gt_delta_info,
-        gt_mask_info,
-        num_fg,
-        pred_logits,
-        pred_deltas,
-        pred_masks,
-    ):
-        """
-        Args:
-            For `gt_class_info`, `gt_delta_info`, `gt_mask_info` and `num_fg` parameters, see
-                :meth:`TensorMask.get_ground_truth`.
-            For `pred_logits`, `pred_deltas` and `pred_masks`, see
-                :meth:`TensorMaskHead.forward`.
-
-        Returns:
-            losses (dict[str: Tensor]): mapping from a named loss to a scalar tensor
-                storing the loss. Used during training only. The potential dict keys are:
-                "loss_cls", "loss_box_reg" and "loss_mask".
-        """
-        gt_classes_target, gt_valid_inds = gt_class_info
-        gt_deltas, gt_fg_inds = gt_delta_info
-        gt_masks, gt_mask_inds = gt_mask_info
-        loss_normalizer = torch.tensor(max(1, num_fg), dtype=torch.float32, device=self.device)
-
-        # classification and regression
-        pred_logits, pred_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat(
-            pred_logits, pred_deltas, self.num_classes
-        )
-        loss_cls = (
-            sigmoid_focal_loss_star_jit(
-                pred_logits[gt_valid_inds],
-                gt_classes_target[gt_valid_inds],
-                alpha=self.focal_loss_alpha,
-                gamma=self.focal_loss_gamma,
-                reduction="sum",
-            )
-            / loss_normalizer
-        )
-
-        if num_fg == 0:
-            loss_box_reg = pred_deltas.sum() * 0
-        else:
-            loss_box_reg = (
-                smooth_l1_loss(pred_deltas[gt_fg_inds], gt_deltas, beta=0.0, reduction="sum")
-                / loss_normalizer
-            )
-        losses = {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
-
-        # mask prediction
-        if self.mask_on:
-            loss_mask = 0
-            for lvl in range(self.num_levels):
-                cur_level_factor = 2 ** lvl if self.bipyramid_on else 1
-                for anc in range(self.num_anchors):
-                    cur_gt_mask_inds = gt_mask_inds[lvl][anc]
-                    if cur_gt_mask_inds is None:
-                        loss_mask += pred_masks[lvl][anc][0, 0, 0, 0] * 0
-                    else:
-                        cur_mask_size = self.mask_sizes[anc] * cur_level_factor
-                        # TODO maybe there are numerical issues when mask sizes are large
-                        cur_size_divider = torch.tensor(
-                            self.mask_loss_weight / (cur_mask_size ** 2),
-                            dtype=torch.float32,
-                            device=self.device,
-                        )
-
-                        cur_pred_masks = pred_masks[lvl][anc][
-                            cur_gt_mask_inds[:, 0],  # N
-                            :,  # V x U
-                            cur_gt_mask_inds[:, 1],  # H
-                            cur_gt_mask_inds[:, 2],  # W
-                        ]
-
-                        loss_mask += F.binary_cross_entropy_with_logits(
-                            cur_pred_masks.view(-1, cur_mask_size, cur_mask_size),  # V, U
-                            gt_masks[lvl][anc].to(dtype=torch.float32),
-                            reduction="sum",
-                            weight=cur_size_divider,
-                            pos_weight=self.mask_pos_weight,
-                        )
-            losses["loss_mask"] = loss_mask / loss_normalizer
-        return losses
-
-    @torch.no_grad()
-    def get_ground_truth(self, anchors, unit_lengths, indexes, targets):
-        """
-        Args:
-            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
-                list of #feature level Boxes. The Boxes contains anchors of
-                this image on the specific feature level.
-            unit_lengths (list[list[Tensor]]): a list of N=#image elements. Each is a
-                list of #feature level Tensor. The tensor contains unit lengths for anchors of
-                this image on the specific feature level.
-            indexes (list[list[Tensor]]): a list of N=#image elements. Each is a
-                list of #feature level Tensor. The tensor contains the 5D index of
-                each anchor, the second dimension means (L, N, H, W, A), where L
-                is level, I is image, H is height, W is width, and A is anchor.
-            targets (list[Instances]): a list of N `Instances`s. The i-th
-                `Instances` contains the ground-truth per-instance annotations
-                for the i-th input image.  Specify `targets` during training only.
-
-        Returns:
-            gt_class_info (Tensor, Tensor): A pair of two tensors for classification.
-                The first one is an integer tensor of shape (R, #classes) storing ground-truth
-                labels for each anchor. R is the total number of anchors in the batch.
-                The second one is an integer tensor of shape (R,), to indicate which
-                anchors are valid for loss computation, which anchors are not.
-            gt_delta_info (Tensor, Tensor): A pair of two tensors for boxes.
-                The first one, of shape (F, 4). F=#foreground anchors.
-                The last dimension represents ground-truth box2box transform
-                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
-                Only foreground anchors have values in this tensor. Could be `None` if F=0.
-                The second one, of shape (R,), is an integer tensor indicating which anchors
-                are foreground ones used for box regression. Could be `None` if F=0.
-            gt_mask_info (list[list[Tensor]], list[list[Tensor]]): A pair of two lists for masks.
-                The first one is a list of P=#feature level elements. Each is a
-                list of A=#anchor tensors. Each tensor contains the ground truth
-                masks of the same size and for the same feature level. Could be `None`.
-                The second one is a list of P=#feature level elements. Each is a
-                list of A=#anchor tensors. Each tensor contains the location of the ground truth
-                masks of the same size and for the same feature level. The second dimension means
-                (N, H, W), where N is image, H is height, and W is width. Could be `None`.
-            num_fg (int): F=#foreground anchors, used later for loss normalization.
-        """
-        gt_classes = []
-        gt_deltas = []
-        gt_masks = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)]
-        gt_mask_inds = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)]
-
-        anchors = [Boxes.cat(anchors_i) for anchors_i in anchors]
-        unit_lengths = [cat(unit_lengths_i) for unit_lengths_i in unit_lengths]
-        indexes = [cat(indexes_i) for indexes_i in indexes]
-
-        num_fg = 0
-        for i, (anchors_im, unit_lengths_im, indexes_im, targets_im) in enumerate(
-            zip(anchors, unit_lengths, indexes, targets)
-        ):
-            # Initialize all
-            gt_classes_i = torch.full_like(
-                unit_lengths_im, self.num_classes, dtype=torch.int64, device=self.device
-            )
-            # Ground truth classes
-            has_gt = len(targets_im) > 0
-            if has_gt:
-                # Compute the pairwise matrix
-                gt_matched_inds, anchor_labels = _assignment_rule(
-                    targets_im.gt_boxes, anchors_im, unit_lengths_im, self.min_anchor_size
-                )
-                # Find the foreground instances
-                fg_inds = anchor_labels == 1
-                fg_anchors = anchors_im[fg_inds]
-                num_fg += len(fg_anchors)
-                # Find the ground truths for foreground instances
-                gt_fg_matched_inds = gt_matched_inds[fg_inds]
-                # Assign labels for foreground instances
-                gt_classes_i[fg_inds] = targets_im.gt_classes[gt_fg_matched_inds]
-                # Anchors with label -1 are ignored, others are left as negative
-                gt_classes_i[anchor_labels == -1] = -1
-
-                # Boxes
-                # Ground truth box regression, only for foregrounds
-                matched_gt_boxes = targets_im[gt_fg_matched_inds].gt_boxes
-                # Compute box regression offsets for foregrounds only
-                gt_deltas_i = self.box2box_transform.get_deltas(
-                    fg_anchors.tensor, matched_gt_boxes.tensor
-                )
-                gt_deltas.append(gt_deltas_i)
-
-                # Masks
-                if self.mask_on:
-                    # Compute masks for each level and each anchor
-                    matched_indexes = indexes_im[fg_inds, :]
-                    for lvl in range(self.num_levels):
-                        ids_lvl = matched_indexes[:, 0] == lvl
-                        if torch.any(ids_lvl):
-                            cur_level_factor = 2 ** lvl if self.bipyramid_on else 1
-                            for anc in range(self.num_anchors):
-                                ids_lvl_anchor = ids_lvl & (matched_indexes[:, 4] == anc)
-                                if torch.any(ids_lvl_anchor):
-                                    gt_masks[lvl][anc].append(
-                                        targets_im[
-                                            gt_fg_matched_inds[ids_lvl_anchor]
-                                        ].gt_masks.crop_and_resize(
-                                            fg_anchors[ids_lvl_anchor].tensor,
-                                            self.mask_sizes[anc] * cur_level_factor,
-                                        )
-                                    )
-                                    # Select (N, H, W) dimensions
-                                    gt_mask_inds_lvl_anc = matched_indexes[ids_lvl_anchor, 1:4]
-                                    # Set the image index to the current image
-                                    gt_mask_inds_lvl_anc[:, 0] = i
-                                    gt_mask_inds[lvl][anc].append(gt_mask_inds_lvl_anc)
-            gt_classes.append(gt_classes_i)
-
-        # Classes and boxes
-        gt_classes = cat(gt_classes)
-        gt_valid_inds = gt_classes >= 0
-        gt_fg_inds = gt_valid_inds & (gt_classes < self.num_classes)
-        gt_classes_target = torch.zeros(
-            (gt_classes.shape[0], self.num_classes), dtype=torch.float32, device=self.device
-        )
-        gt_classes_target[gt_fg_inds, gt_classes[gt_fg_inds]] = 1
-        gt_deltas = cat(gt_deltas) if gt_deltas else None
-
-        # Masks
-        gt_masks = [[cat(mla) if mla else None for mla in ml] for ml in gt_masks]
-        gt_mask_inds = [[cat(ila) if ila else None for ila in il] for il in gt_mask_inds]
-        return (
-            (gt_classes_target, gt_valid_inds),
-            (gt_deltas, gt_fg_inds),
-            (gt_masks, gt_mask_inds),
-            num_fg,
-        )
-
-    def inference(self, pred_logits, pred_deltas, pred_masks, anchors, indexes, images):
-        """
-        Arguments:
-            pred_logits, pred_deltas, pred_masks: Same as the output of:
-                meth:`TensorMaskHead.forward`
-            anchors, indexes: Same as the input of meth:`TensorMask.get_ground_truth`
-            images (ImageList): the input images
-
-        Returns:
-            results (List[Instances]): a list of #images elements.
-        """
-        assert len(anchors) == len(images)
-        results = []
-
-        pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
-        pred_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_deltas]
-
-        pred_logits = cat(pred_logits, dim=1)
-        pred_deltas = cat(pred_deltas, dim=1)
-
-        for img_idx, (anchors_im, indexes_im) in enumerate(zip(anchors, indexes)):
-            # Get the size of the current image
-            image_size = images.image_sizes[img_idx]
-
-            logits_im = pred_logits[img_idx]
-            deltas_im = pred_deltas[img_idx]
-
-            if self.mask_on:
-                masks_im = [[mla[img_idx] for mla in ml] for ml in pred_masks]
-            else:
-                masks_im = [None] * self.num_levels
-            results_im = self.inference_single_image(
-                logits_im,
-                deltas_im,
-                masks_im,
-                Boxes.cat(anchors_im),
-                cat(indexes_im),
-                tuple(image_size),
-            )
-            results.append(results_im)
-        return results
-
-    def inference_single_image(
-        self, pred_logits, pred_deltas, pred_masks, anchors, indexes, image_size
-    ):
-        """
-        Single-image inference. Return bounding-box detection results by thresholding
-        on scores and applying non-maximum suppression (NMS).
-
-        Arguments:
-            pred_logits (list[Tensor]): list of #feature levels. Each entry contains
-                tensor of size (AxHxW, K)
-            pred_deltas (list[Tensor]): Same shape as 'pred_logits' except that K becomes 4.
-            pred_masks (list[list[Tensor]]): List of #feature levels, each is a list of #anchors.
-                Each entry contains tensor of size (M_i*M_i, H, W). `None` if mask_on=False.
-            anchors (list[Boxes]): list of #feature levels. Each entry contains
-                a Boxes object, which contains all the anchors for that
-                image in that feature level.
-            image_size (tuple(H, W)): a tuple of the image height and width.
-
-        Returns:
-            Same as `inference`, but for only one image.
-        """
-        pred_logits = pred_logits.flatten().sigmoid_()
-        # We get top locations across all levels to accelerate the inference speed,
-        # which does not seem to affect the accuracy.
-        # First select values above the threshold
-        logits_top_idxs = torch.where(pred_logits > self.score_threshold)[0]
-        # Then get the top values
-        num_topk = min(self.topk_candidates, logits_top_idxs.shape[0])
-        pred_prob, topk_idxs = pred_logits[logits_top_idxs].sort(descending=True)
-        # Keep top k scoring values
-        pred_prob = pred_prob[:num_topk]
-        # Keep top k values
-        top_idxs = logits_top_idxs[topk_idxs[:num_topk]]
-
-        # class index
-        cls_idxs = top_idxs % self.num_classes
-        # HWA index
-        top_idxs //= self.num_classes
-        # predict boxes
-        pred_boxes = self.box2box_transform.apply_deltas(
-            pred_deltas[top_idxs], anchors[top_idxs].tensor
-        )
-        # apply nms
-        keep = batched_nms(pred_boxes, pred_prob, cls_idxs, self.nms_threshold)
-        # pick the top ones
-        keep = keep[: self.detections_im]
-
-        results = Instances(image_size)
-        results.pred_boxes = Boxes(pred_boxes[keep])
-        results.scores = pred_prob[keep]
-        results.pred_classes = cls_idxs[keep]
-
-        # deal with masks
-        result_masks, result_anchors = [], None
-        if self.mask_on:
-            # index and anchors, useful for masks
-            top_indexes = indexes[top_idxs]
-            top_anchors = anchors[top_idxs]
-            result_indexes = top_indexes[keep]
-            result_anchors = top_anchors[keep]
-            # Get masks and do sigmoid
-            for lvl, _, h, w, anc in result_indexes.tolist():
-                cur_size = self.mask_sizes[anc] * (2 ** lvl if self.bipyramid_on else 1)
-                result_masks.append(
-                    torch.sigmoid(pred_masks[lvl][anc][:, h, w].view(1, cur_size, cur_size))
-                )
-
-        return results, (result_masks, result_anchors)
-
-    def preprocess_image(self, batched_inputs):
-        """
-        Normalize, pad and batch the input images.
-        """
-        images = [x["image"].to(self.device) for x in batched_inputs]
-        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
-        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
-        return images
-
-
-class TensorMaskHead(nn.Module):
-    def __init__(self, cfg, num_levels, num_anchors, mask_sizes, input_shape: List[ShapeSpec]):
-        """
-        TensorMask head.
-        """
-        super().__init__()
-        # fmt: off
-        self.in_features        = cfg.MODEL.TENSOR_MASK.IN_FEATURES
-        in_channels             = input_shape[0].channels
-        num_classes             = cfg.MODEL.TENSOR_MASK.NUM_CLASSES
-        cls_channels            = cfg.MODEL.TENSOR_MASK.CLS_CHANNELS
-        num_convs               = cfg.MODEL.TENSOR_MASK.NUM_CONVS
-        # box parameters
-        bbox_channels           = cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS
-        # mask parameters
-        self.mask_on            = cfg.MODEL.MASK_ON
-        self.mask_sizes         = mask_sizes
-        mask_channels           = cfg.MODEL.TENSOR_MASK.MASK_CHANNELS
-        self.align_on           = cfg.MODEL.TENSOR_MASK.ALIGNED_ON
-        self.bipyramid_on       = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON
-        # fmt: on
-
-        # class subnet
-        cls_subnet = []
-        cur_channels = in_channels
-        for _ in range(num_convs):
-            cls_subnet.append(
-                nn.Conv2d(cur_channels, cls_channels, kernel_size=3, stride=1, padding=1)
-            )
-            cur_channels = cls_channels
-            cls_subnet.append(nn.ReLU())
-
-        self.cls_subnet = nn.Sequential(*cls_subnet)
-        self.cls_score = nn.Conv2d(
-            cur_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
-        )
-        modules_list = [self.cls_subnet, self.cls_score]
-
-        # box subnet
-        bbox_subnet = []
-        cur_channels = in_channels
-        for _ in range(num_convs):
-            bbox_subnet.append(
-                nn.Conv2d(cur_channels, bbox_channels, kernel_size=3, stride=1, padding=1)
-            )
-            cur_channels = bbox_channels
-            bbox_subnet.append(nn.ReLU())
-
-        self.bbox_subnet = nn.Sequential(*bbox_subnet)
-        self.bbox_pred = nn.Conv2d(
-            cur_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
-        )
-        modules_list.extend([self.bbox_subnet, self.bbox_pred])
-
-        # mask subnet
-        if self.mask_on:
-            mask_subnet = []
-            cur_channels = in_channels
-            for _ in range(num_convs):
-                mask_subnet.append(
-                    nn.Conv2d(cur_channels, mask_channels, kernel_size=3, stride=1, padding=1)
-                )
-                cur_channels = mask_channels
-                mask_subnet.append(nn.ReLU())
-
-            self.mask_subnet = nn.Sequential(*mask_subnet)
-            modules_list.append(self.mask_subnet)
-            for mask_size in self.mask_sizes:
-                cur_mask_module = "mask_pred_%02d" % mask_size
-                self.add_module(
-                    cur_mask_module,
-                    nn.Conv2d(
-                        cur_channels, mask_size * mask_size, kernel_size=1, stride=1, padding=0
-                    ),
-                )
-                modules_list.append(getattr(self, cur_mask_module))
-            if self.align_on:
-                if self.bipyramid_on:
-                    for lvl in range(num_levels):
-                        cur_mask_module = "align2nat_%02d" % lvl
-                        lambda_val = 2 ** lvl
-                        setattr(self, cur_mask_module, SwapAlign2Nat(lambda_val))
-                    # Also the fusing layer, stay at the same channel size
-                    mask_fuse = [
-                        nn.Conv2d(cur_channels, cur_channels, kernel_size=3, stride=1, padding=1),
-                        nn.ReLU(),
-                    ]
-                    self.mask_fuse = nn.Sequential(*mask_fuse)
-                    modules_list.append(self.mask_fuse)
-                else:
-                    self.align2nat = SwapAlign2Nat(1)
-
-        # Initialization
-        for modules in modules_list:
-            for layer in modules.modules():
-                if isinstance(layer, nn.Conv2d):
-                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
-                    torch.nn.init.constant_(layer.bias, 0)
-
-        # Use prior in model initialization to improve stability
-        bias_value = -(math.log((1 - 0.01) / 0.01))
-        torch.nn.init.constant_(self.cls_score.bias, bias_value)
-
-    def forward(self, features):
-        """
-        Arguments:
-            features (list[Tensor]): FPN feature map tensors in high to low resolution.
-                Each tensor in the list correspond to different feature levels.
-
-        Returns:
-            pred_logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi).
-                The tensor predicts the classification probability
-                at each spatial position for each of the A anchors and K object
-                classes.
-            pred_deltas (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi).
-                The tensor predicts 4-vector (dx,dy,dw,dh) box
-                regression values for every anchor. These values are the
-                relative offset between the anchor and the ground truth box.
-            pred_masks (list(list[Tensor])): #lvl list of tensors, each is a list of
-                A tensors of shape (N, M_{i,a}, Hi, Wi).
-                The tensor predicts a dense set of M_ixM_i masks at every location.
-        """
-        pred_logits = [self.cls_score(self.cls_subnet(x)) for x in features]
-        pred_deltas = [self.bbox_pred(self.bbox_subnet(x)) for x in features]
-
-        pred_masks = None
-        if self.mask_on:
-            mask_feats = [self.mask_subnet(x) for x in features]
-
-            if self.bipyramid_on:
-                mask_feat_high_res = mask_feats[0]
-                H, W = mask_feat_high_res.shape[-2:]
-                mask_feats_up = []
-                for lvl, mask_feat in enumerate(mask_feats):
-                    lambda_val = 2.0 ** lvl
-                    mask_feat_up = mask_feat
-                    if lvl > 0:
-                        mask_feat_up = F.interpolate(
-                            mask_feat, scale_factor=lambda_val, mode="bilinear", align_corners=False
-                        )
-                    mask_feats_up.append(
-                        self.mask_fuse(mask_feat_up[:, :, :H, :W] + mask_feat_high_res)
-                    )
-                mask_feats = mask_feats_up
-
-            pred_masks = []
-            for lvl, mask_feat in enumerate(mask_feats):
-                cur_masks = []
-                for mask_size in self.mask_sizes:
-                    cur_mask_module = getattr(self, "mask_pred_%02d" % mask_size)
-                    cur_mask = cur_mask_module(mask_feat)
-                    if self.align_on:
-                        if self.bipyramid_on:
-                            cur_mask_module = getattr(self, "align2nat_%02d" % lvl)
-                            cur_mask = cur_mask_module(cur_mask)
-                        else:
-                            cur_mask = self.align2nat(cur_mask)
-                    cur_masks.append(cur_mask)
-                pred_masks.append(cur_masks)
-        return pred_logits, pred_deltas, pred_masks
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py
deleted file mode 100644
index 44479f2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from detectron2.config import CfgNode as CN
-
-
-def add_tensormask_config(cfg):
-    """
-    Add config for TensorMask.
-    """
-    cfg.MODEL.TENSOR_MASK = CN()
-
-    # Anchor parameters
-    cfg.MODEL.TENSOR_MASK.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6", "p7"]
-
-    # Convolutions to use in the towers
-    cfg.MODEL.TENSOR_MASK.NUM_CONVS = 4
-
-    # Number of foreground classes.
-    cfg.MODEL.TENSOR_MASK.NUM_CLASSES = 80
-    # Channel size for the classification tower
-    cfg.MODEL.TENSOR_MASK.CLS_CHANNELS = 256
-
-    cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST = 0.05
-    # Only the top (1000 * #levels) candidate boxes across all levels are
-    # considered jointly during test (to improve speed)
-    cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST = 6000
-    cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST = 0.5
-
-    # Box parameters
-    # Channel size for the box tower
-    cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS = 128
-    # Weights on (dx, dy, dw, dh)
-    cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS = (1.5, 1.5, 0.75, 0.75)
-
-    # Loss parameters
-    cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA = 3.0
-    cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA = 0.3
-
-    # Mask parameters
-    # Channel size for the mask tower
-    cfg.MODEL.TENSOR_MASK.MASK_CHANNELS = 128
-    # Mask loss weight
-    cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT = 2.0
-    # weight on positive pixels within the mask
-    cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT = 1.5
-    # Whether to predict in the aligned representation
-    cfg.MODEL.TENSOR_MASK.ALIGNED_ON = False
-    # Whether to use the bipyramid architecture
-    cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON = False
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py
deleted file mode 100644
index cbbac42..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .swap_align2nat import SwapAlign2Nat, swap_align2nat
-
-__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h
deleted file mode 100644
index 2ec0373..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#pragma once
-#include <torch/types.h>
-
-namespace tensormask {
-
-#ifdef WITH_CUDA
-at::Tensor SwapAlign2Nat_forward_cuda(
-    const at::Tensor& X,
-    const int lambda_val,
-    const float pad_val);
-
-at::Tensor SwapAlign2Nat_backward_cuda(
-    const at::Tensor& gY,
-    const int lambda_val,
-    const int batch_size,
-    const int channel,
-    const int height,
-    const int width);
-#endif
-
-inline at::Tensor SwapAlign2Nat_forward(
-    const at::Tensor& X,
-    const int lambda_val,
-    const float pad_val) {
-  if (X.type().is_cuda()) {
-#ifdef WITH_CUDA
-    return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-inline at::Tensor SwapAlign2Nat_backward(
-    const at::Tensor& gY,
-    const int lambda_val,
-    const int batch_size,
-    const int channel,
-    const int height,
-    const int width) {
-  if (gY.type().is_cuda()) {
-#ifdef WITH_CUDA
-    return SwapAlign2Nat_backward_cuda(
-        gY, lambda_val, batch_size, channel, height, width);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  AT_ERROR("Not implemented on the CPU");
-}
-
-} // namespace tensormask
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu
deleted file mode 100644
index 06de4a4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu
+++ /dev/null
@@ -1,526 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAApplyUtils.cuh>
-
-// TODO make it in a common file
-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
-template <typename T>
-__device__ inline T get_pixel_val(
-    const T* tensor,
-    const int idx,
-    const int H,
-    const int W,
-    const int y,
-    const int x,
-    const int V,
-    const int U,
-    const int v,
-    const int u,
-    const T pad_val) {
-  if ((y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) || (v >= V) ||
-      (u < 0) || (u >= U)) {
-    return pad_val;
-  } else {
-    return tensor[(((idx * V + v) * U + u) * H + y) * W + x];
-  }
-}
-
-template <typename T>
-__device__ inline void add_pixel_val(
-    T* tensor,
-    const T val,
-    const int idx,
-    const int H,
-    const int W,
-    const int y,
-    const int x,
-    const int V,
-    const int U,
-    const int v,
-    const int u) {
-  if ((val == 0.) || (y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) ||
-      (v >= V) || (u < 0) || (u >= U)) {
-    return;
-  } else {
-    atomicAdd(tensor + ((((idx * V + v) * U + u) * H + y) * W + x), val);
-  }
-}
-
-template <typename T>
-__global__ void SwapAlign2NatForwardFeat(
-    const int nthreads,
-    const T* bottom_data,
-    const int Vout,
-    const int Uout,
-    const float hVout,
-    const float hUout,
-    const int Vin,
-    const int Uin,
-    const float lambda,
-    const int Hin,
-    const int Win,
-    const int Hout,
-    const int Wout,
-    const T pad_val,
-    T* top_data) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    int idx = index;
-    const int x = idx % Wout;
-    idx /= Wout;
-    const int y = idx % Hout;
-    idx /= Hout;
-    const int u = idx % Uout;
-    idx /= Uout;
-    const int v = idx % Vout;
-    idx /= Vout;
-
-    const float ox = x * lambda + u - hUout + 0.5;
-    const int xf = static_cast<int>(floor(ox));
-    const int xc = static_cast<int>(ceil(ox));
-    const float xwc = ox - xf;
-    const float xwf = 1. - xwc;
-
-    const float oy = y * lambda + v - hVout + 0.5;
-    const int yf = static_cast<int>(floor(oy));
-    const int yc = static_cast<int>(ceil(oy));
-    const float ywc = oy - yf;
-    const float ywf = 1. - ywc;
-
-    const float ou = (u + 0.5) / lambda - 0.5;
-    const int uf = static_cast<int>(floor(ou));
-    const int uc = static_cast<int>(ceil(ou));
-    const float uwc = ou - uf;
-    const float uwf = 1. - uwc;
-
-    const float ov = (v + 0.5) / lambda - 0.5;
-    const int vf = static_cast<int>(floor(ov));
-    const int vc = static_cast<int>(ceil(ov));
-    const float vwc = ov - vf;
-    const float vwf = 1. - vwc;
-
-    T val = ywf * xwf * vwf * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uf, pad_val) +
-        ywf * xwf * vwf * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uc, pad_val) +
-        ywf * xwf * vwc * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uf, pad_val) +
-        ywf * xwf * vwc * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uc, pad_val) +
-        ywf * xwc * vwf * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uf, pad_val) +
-        ywf * xwc * vwf * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uc, pad_val) +
-        ywf * xwc * vwc * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uf, pad_val) +
-        ywf * xwc * vwc * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uc, pad_val) +
-        ywc * xwf * vwf * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uf, pad_val) +
-        ywc * xwf * vwf * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uc, pad_val) +
-        ywc * xwf * vwc * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uf, pad_val) +
-        ywc * xwf * vwc * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uc, pad_val) +
-        ywc * xwc * vwf * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uf, pad_val) +
-        ywc * xwc * vwf * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uc, pad_val) +
-        ywc * xwc * vwc * uwf *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uf, pad_val) +
-        ywc * xwc * vwc * uwc *
-            get_pixel_val(
-                bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uc, pad_val);
-
-    top_data[index] = val;
-  }
-}
-
-template <typename T>
-__global__ void SwapAlign2NatBackwardFeat(
-    const int nthreads,
-    const T* top_diff,
-    const int Vout,
-    const int Uout,
-    const float hVout,
-    const float hUout,
-    const int Vin,
-    const int Uin,
-    const float lambda,
-    const int Hin,
-    const int Win,
-    const int Hout,
-    const int Wout,
-    T* bottom_diff) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    int idx = index;
-    const int x = idx % Wout;
-    idx /= Wout;
-    const int y = idx % Hout;
-    idx /= Hout;
-    const int u = idx % Uout;
-    idx /= Uout;
-    const int v = idx % Vout;
-    idx /= Vout;
-
-    const float ox = x * lambda + u - hUout + 0.5;
-    const int xf = static_cast<int>(floor(ox));
-    const int xc = static_cast<int>(ceil(ox));
-    const float xwc = ox - xf;
-    const float xwf = 1. - xwc;
-
-    const float oy = y * lambda + v - hVout + 0.5;
-    const int yf = static_cast<int>(floor(oy));
-    const int yc = static_cast<int>(ceil(oy));
-    const float ywc = oy - yf;
-    const float ywf = 1. - ywc;
-
-    const float ou = (u + 0.5) / lambda - 0.5;
-    const int uf = static_cast<int>(floor(ou));
-    const int uc = static_cast<int>(ceil(ou));
-    const float uwc = ou - uf;
-    const float uwf = 1. - uwc;
-
-    const float ov = (v + 0.5) / lambda - 0.5;
-    const int vf = static_cast<int>(floor(ov));
-    const int vc = static_cast<int>(ceil(ov));
-    const float vwc = ov - vf;
-    const float vwf = 1. - vwc;
-
-    const T grad = top_diff[index];
-
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwf * vwf * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xf,
-        Vin,
-        Uin,
-        vf,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwf * vwf * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xf,
-        Vin,
-        Uin,
-        vf,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwf * vwc * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xf,
-        Vin,
-        Uin,
-        vc,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwf * vwc * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xf,
-        Vin,
-        Uin,
-        vc,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwc * vwf * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xc,
-        Vin,
-        Uin,
-        vf,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwc * vwf * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xc,
-        Vin,
-        Uin,
-        vf,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwc * vwc * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xc,
-        Vin,
-        Uin,
-        vc,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywf * xwc * vwc * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yf,
-        xc,
-        Vin,
-        Uin,
-        vc,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwf * vwf * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xf,
-        Vin,
-        Uin,
-        vf,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwf * vwf * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xf,
-        Vin,
-        Uin,
-        vf,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwf * vwc * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xf,
-        Vin,
-        Uin,
-        vc,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwf * vwc * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xf,
-        Vin,
-        Uin,
-        vc,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwc * vwf * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xc,
-        Vin,
-        Uin,
-        vf,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwc * vwf * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xc,
-        Vin,
-        Uin,
-        vf,
-        uc);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwc * vwc * uwf * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xc,
-        Vin,
-        Uin,
-        vc,
-        uf);
-    add_pixel_val(
-        bottom_diff,
-        ywc * xwc * vwc * uwc * grad,
-        idx,
-        Hin,
-        Win,
-        yc,
-        xc,
-        Vin,
-        Uin,
-        vc,
-        uc);
-  }
-}
-
-namespace tensormask {
-
-at::Tensor SwapAlign2Nat_forward_cuda(
-    const at::Tensor& X,
-    const int lambda_val,
-    const float pad_val) {
-  AT_ASSERTM(X.device().is_cuda(), "input must be a CUDA tensor");
-  AT_ASSERTM(X.ndimension() == 4, "input must be a 4D tensor");
-  AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1");
-  const int N = X.size(0);
-  const int C = X.size(1);
-  const int Vin = static_cast<int>(sqrt(static_cast<float>(C)));
-  const int Uin = C / Vin;
-  AT_ASSERTM(
-      C == Vin * Uin && Vin == Uin, "#channels should be a square number");
-  const int Vout = lambda_val * Vin;
-  const int Uout = lambda_val * Uin;
-  const int Hin = X.size(2);
-  const int Win = X.size(3);
-  const float lambda = static_cast<float>(lambda_val);
-  const int Hout = static_cast<int>(ceil(Hin / lambda));
-  const int Wout = static_cast<int>(ceil(Win / lambda));
-  const float hVout = Vout / 2.;
-  const float hUout = Uout / 2.;
-
-  at::cuda::CUDAGuard device_guard(X.device());
-
-  at::Tensor Y = at::empty({N, Vout * Uout, Hout, Wout}, X.options());
-
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(Y.numel(), 512L), 4096L));
-  dim3 block(512);
-
-  if (Y.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return Y;
-  }
-
-  auto X_ = X.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(X.scalar_type(), "SwapAlign2Nat_forward", [&] {
-    SwapAlign2NatForwardFeat<scalar_t><<<grid, block, 0, stream>>>(
-        Y.numel(),
-        X_.data_ptr<scalar_t>(),
-        Vout,
-        Uout,
-        hVout,
-        hUout,
-        Vin,
-        Uin,
-        lambda,
-        Hin,
-        Win,
-        Hout,
-        Wout,
-        pad_val,
-        Y.data_ptr<scalar_t>());
-  });
-  cudaDeviceSynchronize();
-  AT_CUDA_CHECK(cudaGetLastError());
-  return Y;
-}
-
-at::Tensor SwapAlign2Nat_backward_cuda(
-    const at::Tensor& gY,
-    const int lambda_val,
-    const int batch_size,
-    const int channel,
-    const int height,
-    const int width) {
-  AT_ASSERTM(gY.device().is_cuda(), "input gradient must be a CUDA tensor");
-  AT_ASSERTM(gY.ndimension() == 4, "input gradient must be a 4D tensor");
-  AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1");
-  const int Vin = static_cast<int>(sqrt(static_cast<float>(channel)));
-  const int Uin = channel / Vin;
-  const int Vout = lambda_val * Vin;
-  const int Uout = lambda_val * Uin;
-  const float hVout = Vout / 2.;
-  const float hUout = Uout / 2.;
-  const int Hout = gY.size(2);
-  const int Wout = gY.size(3);
-
-  at::cuda::CUDAGuard device_guard(gY.device());
-
-  at::Tensor gX = at::zeros({batch_size, channel, height, width}, gY.options());
-
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(gY.numel(), 512L), 4096L));
-  dim3 block(512);
-
-  // handle possibly empty gradients
-  if (gY.numel() == 0) {
-    AT_CUDA_CHECK(cudaGetLastError());
-    return gX;
-  }
-
-  auto gY_ = gY.contiguous();
-  AT_DISPATCH_FLOATING_TYPES(gY.scalar_type(), "SwapAlign2Nat_backward", [&] {
-    SwapAlign2NatBackwardFeat<scalar_t><<<grid, block, 0, stream>>>(
-        gY.numel(),
-        gY_.data_ptr<scalar_t>(),
-        Vout,
-        Uout,
-        hVout,
-        hUout,
-        Vin,
-        Uin,
-        static_cast<float>(lambda_val),
-        height,
-        width,
-        Hout,
-        Wout,
-        gX.data_ptr<scalar_t>());
-  });
-  AT_CUDA_CHECK(cudaGetLastError());
-  return gX;
-}
-
-} // namespace tensormask
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp
deleted file mode 100644
index ad8e472..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-#include <torch/extension.h>
-#include "SwapAlign2Nat/SwapAlign2Nat.h"
-
-namespace tensormask {
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def(
-      "swap_align2nat_forward",
-      &SwapAlign2Nat_forward,
-      "SwapAlign2Nat_forward");
-  m.def(
-      "swap_align2nat_backward",
-      &SwapAlign2Nat_backward,
-      "SwapAlign2Nat_backward");
-}
-
-} // namespace tensormask
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py
deleted file mode 100644
index a72c98a..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from torch import nn
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
-
-from tensormask import _C
-
-
-class _SwapAlign2Nat(Function):
-    @staticmethod
-    def forward(ctx, X, lambda_val, pad_val):
-        ctx.lambda_val = lambda_val
-        ctx.input_shape = X.size()
-
-        Y = _C.swap_align2nat_forward(X, lambda_val, pad_val)
-        return Y
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, gY):
-        lambda_val = ctx.lambda_val
-        bs, ch, h, w = ctx.input_shape
-
-        gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w)
-
-        return gX, None, None
-
-
-swap_align2nat = _SwapAlign2Nat.apply
-
-
-class SwapAlign2Nat(nn.Module):
-    """
-        The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174.
-        Given an input tensor that predicts masks of shape (N, C=VxU, H, W),
-        apply the op, it will return masks of shape (N, V'xU', H', W') where
-        the unit lengths of (V, U) and (H, W) are swapped, and the mask representation
-        is transformed from aligned to natural.
-        Args:
-            lambda_val (int): the relative unit length ratio between (V, U) and (H, W),
-                                as we always have larger unit lengths for (V, U) than (H, W),
-                                lambda_val is always >= 1.
-            pad_val (float):    padding value for the values falling outside of the input
-                                tensor, default set to -6 as sigmoid(-6) is ~0, indicating
-                                that is no masks outside of the tensor.
-    """
-
-    def __init__(self, lambda_val, pad_val=-6.0):
-        super(SwapAlign2Nat, self).__init__()
-        self.lambda_val = lambda_val
-        self.pad_val = pad_val
-
-    def forward(self, X):
-        return swap_align2nat(X, self.lambda_val, self.pad_val)
-
-    def __repr__(self):
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "lambda_val=" + str(self.lambda_val)
-        tmpstr += ", pad_val=" + str(self.pad_val)
-        tmpstr += ")"
-        return tmpstr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py
deleted file mode 100644
index 168f997..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py
deleted file mode 100644
index b3d018c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import unittest
-import torch
-from torch.autograd import gradcheck
-
-from tensormask.layers.swap_align2nat import SwapAlign2Nat
-
-
-class SwapAlign2NatTest(unittest.TestCase):
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_swap_align2nat_gradcheck_cuda(self):
-        dtype = torch.float64
-        device = torch.device("cuda")
-        m = SwapAlign2Nat(2).to(dtype=dtype, device=device)
-        x = torch.rand(2, 4, 10, 10, dtype=dtype, device=device, requires_grad=True)
-
-        self.assertTrue(gradcheck(m, x), "gradcheck failed for SwapAlign2Nat CUDA")
-
-    def _swap_align2nat(self, tensor, lambda_val):
-        """
-        The basic setup for testing Swap_Align
-        """
-        op = SwapAlign2Nat(lambda_val, pad_val=0.0)
-        input = torch.from_numpy(tensor[None, :, :, :].astype("float32"))
-        output = op.forward(input.cuda()).cpu().numpy()
-        return output[0]
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py
deleted file mode 100644
index b898fc7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-TensorMask Training Script.
-
-This script is a simplified version of the training script in detectron2/tools.
-"""
-
-import os
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
-from detectron2.evaluation import COCOEvaluator, verify_results
-
-from tensormask import add_tensormask_config
-
-
-class Trainer(DefaultTrainer):
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        return COCOEvaluator(dataset_name, cfg, True, output_folder)
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    add_tensormask_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md
deleted file mode 100644
index 4b7a901..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md
+++ /dev/null
@@ -1,60 +0,0 @@
-
-# TridentNet in Detectron2
-**Scale-Aware Trident Networks for Object Detection**
-
-Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang
-
-[[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1901.01892)] [[`BibTeX`](#CitingTridentNet)]
-
-<div align="center">
-  <img src="https://drive.google.com/uc?export=view&id=10THEPdIPmf3ooMyNzrfZbpWihEBvixwt" width="700px" />
-</div>
-
-In this repository, we implement TridentNet-Fast in Detectron2.
-Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost.
-
-## Training
-
-To train a model, run
-```bash
-python /path/to/detectron2/projects/TridentNet/train_net.py --config-file <config.yaml>
-```
-
-For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs,
-one should execute:
-```bash
-python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --num-gpus 8
-```
-
-## Evaluation
-
-Model evaluation can be done similarly:
-```bash
-python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHTS model.pth
-```
-
-## Results on MS-COCO in Detectron2
-
-|Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl|download|
-|-----|--------|----|--------|--|----|----|---|---|---|--------|
-|Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a>|
-|TridentFast|R50-C4|C5-128ROI|1X|38.0|58.1|40.8|19.5|42.2|54.6|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_1x/148572687/model_final_756cda.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_1x/148572687/metrics.json">metrics</a>|
-|Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a>|
-|TridentFast|R50-C4|C5-128ROI|3X|40.6|60.8|43.6|23.4|44.7|57.1|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_3x/148572287/model_final_e1027c.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_3x/148572287/metrics.json">metrics</a>|
-|Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a>|
-|TridentFast|R101-C4|C5-128ROI|3X|43.6|63.4|47.0|24.3|47.8|60.0|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_101_C4_3x/148572198/model_final_164568.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_101_C4_3x/148572198/metrics.json">metrics</a>|
-
-
-## <a name="CitingTridentNet"></a>Citing TridentNet
-
-If you use TridentNet, please use the following BibTeX entry.
-
-```
-@InProceedings{li2019scale,
-  title={Scale-Aware Trident Networks for Object Detection},
-  author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
-  journal={The International Conference on Computer Vision (ICCV)},
-  year={2019}
-}
-```
-
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml
deleted file mode 100644
index 8c3d807..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_trident_resnet_backbone"
-  ROI_HEADS:
-    NAME: "TridentRes5ROIHeads"
-    POSITIVE_FRACTION: 0.5
-    BATCH_SIZE_PER_IMAGE: 128
-    PROPOSAL_APPEND_GT: False
-  PROPOSAL_GENERATOR:
-    NAME: "TridentRPN"
-  RPN:
-    POST_NMS_TOPK_TRAIN: 500
-  TRIDENT:
-    NUM_BRANCH: 3
-    BRANCH_DILATIONS: [1, 2, 3]
-    TEST_BRANCH_IDX: 1
-    TRIDENT_STAGE: "res4"
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml
deleted file mode 100644
index bc83c2f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "Base-TridentNet-Fast-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 101
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml
deleted file mode 100644
index fda2cb6..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_BASE_: "Base-TridentNet-Fast-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml
deleted file mode 100644
index ebf89d0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "Base-TridentNet-Fast-C4.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: False
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py
deleted file mode 100644
index eac2ec5..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-"""
-TridentNet Training Script.
-
-This script is a simplified version of the training script in detectron2/tools.
-"""
-
-import os
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
-from detectron2.evaluation import COCOEvaluator
-
-from tridentnet import add_tridentnet_config
-
-
-class Trainer(DefaultTrainer):
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        return COCOEvaluator(dataset_name, cfg, True, output_folder)
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    add_tridentnet_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py
deleted file mode 100644
index 2fcdeb4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .config import add_tridentnet_config
-from .trident_backbone import (
-    TridentBottleneckBlock,
-    build_trident_resnet_backbone,
-    make_trident_stage,
-)
-from .trident_rpn import TridentRPN
-from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py
deleted file mode 100644
index f33f473..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-from detectron2.config import CfgNode as CN
-
-
-def add_tridentnet_config(cfg):
-    """
-    Add config for tridentnet.
-    """
-    _C = cfg
-
-    _C.MODEL.TRIDENT = CN()
-
-    # Number of branches for TridentNet.
-    _C.MODEL.TRIDENT.NUM_BRANCH = 3
-    # Specify the dilations for each branch.
-    _C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3]
-    # Specify the stage for applying trident blocks. Default stage is Res4 according to the
-    # TridentNet paper.
-    _C.MODEL.TRIDENT.TRIDENT_STAGE = "res4"
-    # Specify the test branch index TridentNet Fast inference:
-    #   - use -1 to aggregate results of all branches during inference.
-    #   - otherwise, only using specified branch for fast inference. Recommended setting is
-    #     to use the middle branch.
-    _C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py
deleted file mode 100644
index 232dfaf..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import fvcore.nn.weight_init as weight_init
-import torch
-import torch.nn.functional as F
-
-from detectron2.layers import Conv2d, FrozenBatchNorm2d, get_norm
-from detectron2.modeling import BACKBONE_REGISTRY, ResNet, ResNetBlockBase, make_stage
-from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock, DeformBottleneckBlock
-
-from .trident_conv import TridentConv
-
-__all__ = ["TridentBottleneckBlock", "make_trident_stage", "build_trident_resnet_backbone"]
-
-
-class TridentBottleneckBlock(ResNetBlockBase):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        *,
-        bottleneck_channels,
-        stride=1,
-        num_groups=1,
-        norm="BN",
-        stride_in_1x1=False,
-        num_branch=3,
-        dilations=(1, 2, 3),
-        concat_output=False,
-        test_branch_idx=-1,
-    ):
-        """
-        Args:
-            num_branch (int): the number of branches in TridentNet.
-            dilations (tuple): the dilations of multiple branches in TridentNet.
-            concat_output (bool): if concatenate outputs of multiple branches in TridentNet.
-                Use 'True' for the last trident block.
-        """
-        super().__init__(in_channels, out_channels, stride)
-
-        assert num_branch == len(dilations)
-
-        self.num_branch = num_branch
-        self.concat_output = concat_output
-        self.test_branch_idx = test_branch_idx
-
-        if in_channels != out_channels:
-            self.shortcut = Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size=1,
-                stride=stride,
-                bias=False,
-                norm=get_norm(norm, out_channels),
-            )
-        else:
-            self.shortcut = None
-
-        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
-
-        self.conv1 = Conv2d(
-            in_channels,
-            bottleneck_channels,
-            kernel_size=1,
-            stride=stride_1x1,
-            bias=False,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        self.conv2 = TridentConv(
-            bottleneck_channels,
-            bottleneck_channels,
-            kernel_size=3,
-            stride=stride_3x3,
-            paddings=dilations,
-            bias=False,
-            groups=num_groups,
-            dilations=dilations,
-            num_branch=num_branch,
-            test_branch_idx=test_branch_idx,
-            norm=get_norm(norm, bottleneck_channels),
-        )
-
-        self.conv3 = Conv2d(
-            bottleneck_channels,
-            out_channels,
-            kernel_size=1,
-            bias=False,
-            norm=get_norm(norm, out_channels),
-        )
-
-        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
-            if layer is not None:  # shortcut can be None
-                weight_init.c2_msra_fill(layer)
-
-    def forward(self, x):
-        num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
-        if not isinstance(x, list):
-            x = [x] * num_branch
-        out = [self.conv1(b) for b in x]
-        out = [F.relu_(b) for b in out]
-
-        out = self.conv2(out)
-        out = [F.relu_(b) for b in out]
-
-        out = [self.conv3(b) for b in out]
-
-        if self.shortcut is not None:
-            shortcut = [self.shortcut(b) for b in x]
-        else:
-            shortcut = x
-
-        out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)]
-        out = [F.relu_(b) for b in out]
-        if self.concat_output:
-            out = torch.cat(out)
-        return out
-
-
-def make_trident_stage(block_class, num_blocks, first_stride, **kwargs):
-    """
-    Create a resnet stage by creating many blocks for TridentNet.
-    """
-    blocks = []
-    for i in range(num_blocks - 1):
-        blocks.append(block_class(stride=first_stride if i == 0 else 1, **kwargs))
-        kwargs["in_channels"] = kwargs["out_channels"]
-    blocks.append(block_class(stride=1, concat_output=True, **kwargs))
-    return blocks
-
-
-@BACKBONE_REGISTRY.register()
-def build_trident_resnet_backbone(cfg, input_shape):
-    """
-    Create a ResNet instance from config for TridentNet.
-
-    Returns:
-        ResNet: a :class:`ResNet` instance.
-    """
-    # need registration of new blocks/stems?
-    norm = cfg.MODEL.RESNETS.NORM
-    stem = BasicStem(
-        in_channels=input_shape.channels,
-        out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
-        norm=norm,
-    )
-    freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT
-
-    if freeze_at >= 1:
-        for p in stem.parameters():
-            p.requires_grad = False
-        stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem)
-
-    # fmt: off
-    out_features         = cfg.MODEL.RESNETS.OUT_FEATURES
-    depth                = cfg.MODEL.RESNETS.DEPTH
-    num_groups           = cfg.MODEL.RESNETS.NUM_GROUPS
-    width_per_group      = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
-    bottleneck_channels  = num_groups * width_per_group
-    in_channels          = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
-    out_channels         = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
-    stride_in_1x1        = cfg.MODEL.RESNETS.STRIDE_IN_1X1
-    res5_dilation        = cfg.MODEL.RESNETS.RES5_DILATION
-    deform_on_per_stage  = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
-    deform_modulated     = cfg.MODEL.RESNETS.DEFORM_MODULATED
-    deform_num_groups    = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
-    num_branch           = cfg.MODEL.TRIDENT.NUM_BRANCH
-    branch_dilations     = cfg.MODEL.TRIDENT.BRANCH_DILATIONS
-    trident_stage        = cfg.MODEL.TRIDENT.TRIDENT_STAGE
-    test_branch_idx      = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX
-    # fmt: on
-    assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
-
-    num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
-
-    stages = []
-
-    res_stage_idx = {"res2": 2, "res3": 3, "res4": 4, "res5": 5}
-    out_stage_idx = [res_stage_idx[f] for f in out_features]
-    trident_stage_idx = res_stage_idx[trident_stage]
-    max_stage_idx = max(out_stage_idx)
-    for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
-        dilation = res5_dilation if stage_idx == 5 else 1
-        first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
-        stage_kargs = {
-            "num_blocks": num_blocks_per_stage[idx],
-            "first_stride": first_stride,
-            "in_channels": in_channels,
-            "bottleneck_channels": bottleneck_channels,
-            "out_channels": out_channels,
-            "num_groups": num_groups,
-            "norm": norm,
-            "stride_in_1x1": stride_in_1x1,
-            "dilation": dilation,
-        }
-        if stage_idx == trident_stage_idx:
-            assert not deform_on_per_stage[
-                idx
-            ], "Not support deformable conv in Trident blocks yet."
-            stage_kargs["block_class"] = TridentBottleneckBlock
-            stage_kargs["num_branch"] = num_branch
-            stage_kargs["dilations"] = branch_dilations
-            stage_kargs["test_branch_idx"] = test_branch_idx
-            stage_kargs.pop("dilation")
-        elif deform_on_per_stage[idx]:
-            stage_kargs["block_class"] = DeformBottleneckBlock
-            stage_kargs["deform_modulated"] = deform_modulated
-            stage_kargs["deform_num_groups"] = deform_num_groups
-        else:
-            stage_kargs["block_class"] = BottleneckBlock
-        blocks = (
-            make_trident_stage(**stage_kargs)
-            if stage_idx == trident_stage_idx
-            else make_stage(**stage_kargs)
-        )
-        in_channels = out_channels
-        out_channels *= 2
-        bottleneck_channels *= 2
-
-        if freeze_at >= stage_idx:
-            for block in blocks:
-                block.freeze()
-        stages.append(blocks)
-    return ResNet(stem, stages, out_features=out_features)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py
deleted file mode 100644
index 7e2d525..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-from torch import nn
-from torch.nn import functional as F
-from torch.nn.modules.utils import _pair
-
-from detectron2.layers.wrappers import _NewEmptyTensorOp
-
-
-class TridentConv(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        paddings=0,
-        dilations=1,
-        groups=1,
-        num_branch=1,
-        test_branch_idx=-1,
-        bias=False,
-        norm=None,
-        activation=None,
-    ):
-        super(TridentConv, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = _pair(kernel_size)
-        self.num_branch = num_branch
-        self.stride = _pair(stride)
-        self.groups = groups
-        self.with_bias = bias
-        if isinstance(paddings, int):
-            paddings = [paddings] * self.num_branch
-        if isinstance(dilations, int):
-            dilations = [dilations] * self.num_branch
-        self.paddings = [_pair(padding) for padding in paddings]
-        self.dilations = [_pair(dilation) for dilation in dilations]
-        self.test_branch_idx = test_branch_idx
-        self.norm = norm
-        self.activation = activation
-
-        assert len({self.num_branch, len(self.paddings), len(self.dilations)}) == 1
-
-        self.weight = nn.Parameter(
-            torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
-        )
-        if bias:
-            self.bias = nn.Parameter(torch.Tensor(out_channels))
-        else:
-            self.bias = None
-
-        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
-        if self.bias is not None:
-            nn.init.constant_(self.bias, 0)
-
-    def forward(self, inputs):
-        num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
-        assert len(inputs) == num_branch
-
-        if inputs[0].numel() == 0:
-            output_shape = [
-                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
-                for i, p, di, k, s in zip(
-                    inputs[0].shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
-                )
-            ]
-            output_shape = [input[0].shape[0], self.weight.shape[0]] + output_shape
-            return [_NewEmptyTensorOp.apply(input, output_shape) for input in inputs]
-
-        if self.training or self.test_branch_idx == -1:
-            outputs = [
-                F.conv2d(input, self.weight, self.bias, self.stride, padding, dilation, self.groups)
-                for input, dilation, padding in zip(inputs, self.dilations, self.paddings)
-            ]
-        else:
-            outputs = [
-                F.conv2d(
-                    inputs[0],
-                    self.weight,
-                    self.bias,
-                    self.stride,
-                    self.paddings[self.test_branch_idx],
-                    self.dilations[self.test_branch_idx],
-                    self.groups,
-                )
-            ]
-
-        if self.norm is not None:
-            outputs = [self.norm(x) for x in outputs]
-        if self.activation is not None:
-            outputs = [self.activation(x) for x in outputs]
-        return outputs
-
-    def extra_repr(self):
-        tmpstr = "in_channels=" + str(self.in_channels)
-        tmpstr += ", out_channels=" + str(self.out_channels)
-        tmpstr += ", kernel_size=" + str(self.kernel_size)
-        tmpstr += ", num_branch=" + str(self.num_branch)
-        tmpstr += ", test_branch_idx=" + str(self.test_branch_idx)
-        tmpstr += ", stride=" + str(self.stride)
-        tmpstr += ", paddings=" + str(self.paddings)
-        tmpstr += ", dilations=" + str(self.dilations)
-        tmpstr += ", groups=" + str(self.groups)
-        tmpstr += ", bias=" + str(self.with_bias)
-        return tmpstr
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py
deleted file mode 100644
index 65deb90..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from detectron2.layers import batched_nms
-from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
-from detectron2.modeling.roi_heads.roi_heads import Res5ROIHeads
-from detectron2.structures import Instances
-
-
-def merge_branch_instances(instances, num_branch, nms_thresh, topk_per_image):
-    """
-    Merge detection results from different branches of TridentNet.
-    Return detection results by applying non-maximum suppression (NMS) on bounding boxes
-    and keep the unsuppressed boxes and other instances (e.g mask) if any.
-
-    Args:
-        instances (list[Instances]): A list of N * num_branch instances that store detection
-            results. Contain N images and each image has num_branch instances.
-        num_branch (int): Number of branches used for merging detection results for each image.
-        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
-        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
-            all detections.
-
-    Returns:
-        results: (list[Instances]): A list of N instances, one for each image in the batch,
-            that stores the topk most confidence detections after merging results from multiple
-            branches.
-    """
-    if num_branch == 1:
-        return instances
-
-    batch_size = len(instances) // num_branch
-    results = []
-    for i in range(batch_size):
-        instance = Instances.cat([instances[i + batch_size * j] for j in range(num_branch)])
-
-        # Apply per-class NMS
-        keep = batched_nms(
-            instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thresh
-        )
-        keep = keep[:topk_per_image]
-        result = instance[keep]
-
-        results.append(result)
-
-    return results
-
-
-@ROI_HEADS_REGISTRY.register()
-class TridentRes5ROIHeads(Res5ROIHeads):
-    """
-    The TridentNet ROIHeads in a typical "C4" R-CNN model.
-    See :class:`Res5ROIHeads`.
-    """
-
-    def __init__(self, cfg, input_shape):
-        super().__init__(cfg, input_shape)
-
-        self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
-        self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
-
-    def forward(self, images, features, proposals, targets=None):
-        """
-        See :class:`Res5ROIHeads.forward`.
-        """
-        num_branch = self.num_branch if self.training or not self.trident_fast else 1
-        all_targets = targets * num_branch if targets is not None else None
-        pred_instances, losses = super().forward(images, features, proposals, all_targets)
-        del images, all_targets, targets
-
-        if self.training:
-            return pred_instances, losses
-        else:
-            pred_instances = merge_branch_instances(
-                pred_instances,
-                num_branch,
-                self.box_predictor.test_nms_thresh,
-                self.box_predictor.test_topk_per_image,
-            )
-
-            return pred_instances, {}
-
-
-@ROI_HEADS_REGISTRY.register()
-class TridentStandardROIHeads(StandardROIHeads):
-    """
-    The `StandardROIHeads` for TridentNet.
-    See :class:`StandardROIHeads`.
-    """
-
-    def __init__(self, cfg, input_shape):
-        super(TridentStandardROIHeads, self).__init__(cfg, input_shape)
-
-        self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
-        self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
-
-    def forward(self, images, features, proposals, targets=None):
-        """
-        See :class:`Res5ROIHeads.forward`.
-        """
-        # Use 1 branch if using trident_fast during inference.
-        num_branch = self.num_branch if self.training or not self.trident_fast else 1
-        # Duplicate targets for all branches in TridentNet.
-        all_targets = targets * num_branch if targets is not None else None
-        pred_instances, losses = super().forward(images, features, proposals, all_targets)
-        del images, all_targets, targets
-
-        if self.training:
-            return pred_instances, losses
-        else:
-            pred_instances = merge_branch_instances(
-                pred_instances,
-                num_branch,
-                self.box_predictor.test_nms_thresh,
-                self.box_predictor.test_topk_per_image,
-            )
-
-            return pred_instances, {}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py
deleted file mode 100644
index c30137f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch
-
-from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY
-from detectron2.modeling.proposal_generator.rpn import RPN
-from detectron2.structures import ImageList
-
-
-@PROPOSAL_GENERATOR_REGISTRY.register()
-class TridentRPN(RPN):
-    """
-    Trident RPN subnetwork.
-    """
-
-    def __init__(self, cfg, input_shape):
-        super(TridentRPN, self).__init__(cfg, input_shape)
-
-        self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
-        self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
-
-    def forward(self, images, features, gt_instances=None):
-        """
-        See :class:`RPN.forward`.
-        """
-        num_branch = self.num_branch if self.training or not self.trident_fast else 1
-        # Duplicate images and gt_instances for all branches in TridentNet.
-        all_images = ImageList(
-            torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch
-        )
-        all_gt_instances = gt_instances * num_branch if gt_instances is not None else None
-
-        return super(TridentRPN, self).forward(all_images, features, all_gt_instances)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg b/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg
deleted file mode 100644
index b09bba9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg
+++ /dev/null
@@ -1,26 +0,0 @@
-[isort]
-line_length=100
-multi_line_output=3
-include_trailing_comma=True
-known_standard_library=numpy,setuptools,mock
-skip=./datasets,docs
-skip_glob=*/__init__.py
-known_myself=detectron2
-known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx
-no_lines_before=STDLIB,THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
-default_section=FIRSTPARTY
-
-[mypy]
-python_version=3.6
-ignore_missing_imports = True
-warn_unused_configs = True
-disallow_untyped_defs = True
-check_untyped_defs = True
-warn_unused_ignores = True
-warn_redundant_casts = True
-show_column_numbers = True
-follow_imports = silent
-allow_redefinition = True
-; Require all functions to be annotated
-disallow_incomplete_defs = True
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/setup.py b/preprocess/humanparsing/mhp_extension/detectron2/setup.py
deleted file mode 100644
index a863fab..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/setup.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import glob
-import os
-import shutil
-from os import path
-from setuptools import find_packages, setup
-from typing import List
-import torch
-from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
-
-torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
-assert torch_ver >= [1, 4], "Requires PyTorch >= 1.4"
-
-
-def get_version():
-    init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py")
-    init_py = open(init_py_path, "r").readlines()
-    version_line = [l.strip() for l in init_py if l.startswith("__version__")][0]
-    version = version_line.split("=")[-1].strip().strip("'\"")
-
-    # The following is used to build release packages.
-    # Users should never use it.
-    suffix = os.getenv("D2_VERSION_SUFFIX", "")
-    version = version + suffix
-    if os.getenv("BUILD_NIGHTLY", "0") == "1":
-        from datetime import datetime
-
-        date_str = datetime.today().strftime("%y%m%d")
-        version = version + ".dev" + date_str
-
-        new_init_py = [l for l in init_py if not l.startswith("__version__")]
-        new_init_py.append('__version__ = "{}"\n'.format(version))
-        with open(init_py_path, "w") as f:
-            f.write("".join(new_init_py))
-    return version
-
-
-def get_extensions():
-    this_dir = path.dirname(path.abspath(__file__))
-    extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc")
-
-    main_source = path.join(extensions_dir, "vision.cpp")
-    sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
-    source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob(
-        path.join(extensions_dir, "*.cu")
-    )
-
-    sources = [main_source] + sources
-    extension = CppExtension
-
-    extra_compile_args = {"cxx": []}
-    define_macros = []
-
-    if (
-        torch.cuda.is_available() and CUDA_HOME is not None and os.path.isdir(CUDA_HOME)
-    ) or os.getenv("FORCE_CUDA", "0") == "1":
-        extension = CUDAExtension
-        sources += source_cuda
-        define_macros += [("WITH_CUDA", None)]
-        extra_compile_args["nvcc"] = [
-            "-DCUDA_HAS_FP16=1",
-            "-D__CUDA_NO_HALF_OPERATORS__",
-            "-D__CUDA_NO_HALF_CONVERSIONS__",
-            "-D__CUDA_NO_HALF2_OPERATORS__",
-        ]
-
-        # It's better if pytorch can do this by default ..
-        CC = os.environ.get("CC", None)
-        if CC is not None:
-            extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
-
-    include_dirs = [extensions_dir]
-
-    ext_modules = [
-        extension(
-            "detectron2._C",
-            sources,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        )
-    ]
-
-    return ext_modules
-
-
-def get_model_zoo_configs() -> List[str]:
-    """
-    Return a list of configs to include in package for model zoo. Copy over these configs inside
-    detectron2/model_zoo.
-    """
-
-    # Use absolute paths while symlinking.
-    source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs")
-    destination = path.join(
-        path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs"
-    )
-    # Symlink the config directory inside package to have a cleaner pip install.
-
-    # Remove stale symlink/directory from a previous build.
-    if path.exists(source_configs_dir):
-        if path.islink(destination):
-            os.unlink(destination)
-        elif path.isdir(destination):
-            shutil.rmtree(destination)
-
-    if not path.exists(destination):
-        try:
-            os.symlink(source_configs_dir, destination)
-        except OSError:
-            # Fall back to copying if symlink fails: ex. on Windows.
-            shutil.copytree(source_configs_dir, destination)
-
-    config_paths = glob.glob("configs/**/*.yaml", recursive=True)
-    return config_paths
-
-
-setup(
-    name="detectron2",
-    version=get_version(),
-    author="FAIR",
-    url="https://github.com/facebookresearch/detectron2",
-    description="Detectron2 is FAIR's next-generation research "
-    "platform for object detection and segmentation.",
-    packages=find_packages(exclude=("configs", "tests*")),
-    package_data={"detectron2.model_zoo": get_model_zoo_configs()},
-    python_requires=">=3.6",
-    install_requires=[
-        "termcolor>=1.1",
-        "Pillow",  # you can also use pillow-simd for better performance
-        "yacs>=0.1.6",
-        "tabulate",
-        "cloudpickle",
-        "matplotlib",
-        "mock",
-        "tqdm>4.29.0",
-        "tensorboard",
-        "fvcore>=0.1.1",
-        "future",  # used by caffe2
-        "pydot",  # used to save caffe2 SVGs
-    ],
-    extras_require={
-        "all": ["shapely", "psutil"],
-        "dev": [
-            "flake8==3.7.9",
-            "isort",
-            "black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2",
-            "flake8-bugbear",
-            "flake8-comprehensions",
-        ],
-    },
-    ext_modules=get_extensions(),
-    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
-)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md
deleted file mode 100644
index f560384..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-## Unit Tests
-
-To run the unittests, do:
-```
-cd detectron2
-python -m unittest discover -v -s ./tests
-```
-
-There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev).
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py
deleted file mode 100644
index 168f997..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py
deleted file mode 100644
index 2cd807d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import json
-import numpy as np
-import os
-import tempfile
-import unittest
-import pycocotools
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json
-from detectron2.structures import BoxMode
-
-
-def make_mask():
-    """
-    Makes a donut shaped binary mask.
-    """
-    H = 100
-    W = 100
-    mask = np.zeros([H, W], dtype=np.uint8)
-    for x in range(W):
-        for y in range(H):
-            d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y]))
-            if d > 10 and d < 20:
-                mask[y, x] = 1
-    return mask
-
-
-def make_dataset_dicts(mask):
-    """
-    Returns a list of dicts that represents a single COCO data point for
-    object detection. The single instance given by `mask` is represented by
-    RLE.
-    """
-    record = {}
-    record["file_name"] = "test"
-    record["image_id"] = 0
-    record["height"] = mask.shape[0]
-    record["width"] = mask.shape[1]
-
-    y, x = np.nonzero(mask)
-    segmentation = pycocotools.mask.encode(np.asarray(mask, order="F"))
-    min_x = np.min(x)
-    max_x = np.max(x)
-    min_y = np.min(y)
-    max_y = np.max(y)
-    obj = {
-        "bbox": [min_x, min_y, max_x, max_y],
-        "bbox_mode": BoxMode.XYXY_ABS,
-        "category_id": 0,
-        "iscrowd": 0,
-        "segmentation": segmentation,
-    }
-    record["annotations"] = [obj]
-    return [record]
-
-
-class TestRLEToJson(unittest.TestCase):
-    def test(self):
-        # Make a dummy dataset.
-        mask = make_mask()
-        DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask))
-        MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"])
-
-        # Dump to json.
-        json_dict = convert_to_coco_dict("test_dataset")
-        with tempfile.TemporaryDirectory() as tmpdir:
-            json_file_name = os.path.join(tmpdir, "test.json")
-            with open(json_file_name, "w") as f:
-                json.dump(json_dict, f)
-            # Load from json.
-            dicts = load_coco_json(json_file_name, "")
-
-        # Check the loaded mask matches the original.
-        anno = dicts[0]["annotations"][0]
-        loaded_mask = pycocotools.mask.decode(anno["segmentation"])
-        self.assertTrue(np.array_equal(loaded_mask, mask))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py
deleted file mode 100644
index bdd94dd..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-import copy
-import numpy as np
-import unittest
-import pycocotools.mask as mask_util
-
-from detectron2.data import detection_utils
-from detectron2.data import transforms as T
-from detectron2.structures import BitMasks, BoxMode
-
-
-class TestTransformAnnotations(unittest.TestCase):
-    def test_transform_simple_annotation(self):
-        transforms = T.TransformList([T.HFlipTransform(400)])
-        anno = {
-            "bbox": np.asarray([10, 10, 200, 300]),
-            "bbox_mode": BoxMode.XYXY_ABS,
-            "category_id": 3,
-            "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]],
-        }
-
-        output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400))
-        self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300]))
-        self.assertEqual(len(output["segmentation"]), len(anno["segmentation"]))
-        self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10]))
-
-        detection_utils.annotations_to_instances([output, output], (400, 400))
-
-    def test_flip_keypoints(self):
-        transforms = T.TransformList([T.HFlipTransform(400)])
-        anno = {
-            "bbox": np.asarray([10, 10, 200, 300]),
-            "bbox_mode": BoxMode.XYXY_ABS,
-            "keypoints": np.random.rand(17, 3) * 50 + 15,
-        }
-
-        output = detection_utils.transform_instance_annotations(
-            copy.deepcopy(anno),
-            transforms,
-            (400, 400),
-            keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices(
-                ["keypoints_coco_2017_train"]
-            ),
-        )
-        # The first keypoint is nose
-        self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0]))
-        # The last 16 keypoints are 8 left-right pairs
-        self.assertTrue(
-            np.allclose(
-                output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1],
-                400 - anno["keypoints"][1:, 0].reshape(-1, 2),
-            )
-        )
-        self.assertTrue(
-            np.allclose(
-                output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :],
-                anno["keypoints"][1:, 1:].reshape(-1, 2, 2),
-            )
-        )
-
-    def test_transform_RLE(self):
-        transforms = T.TransformList([T.HFlipTransform(400)])
-        mask = np.zeros((300, 400), order="F").astype("uint8")
-        mask[:, :200] = 1
-
-        anno = {
-            "bbox": np.asarray([10, 10, 200, 300]),
-            "bbox_mode": BoxMode.XYXY_ABS,
-            "segmentation": mask_util.encode(mask[:, :, None])[0],
-            "category_id": 3,
-        }
-        output = detection_utils.transform_instance_annotations(
-            copy.deepcopy(anno), transforms, (300, 400)
-        )
-        mask = output["segmentation"]
-        self.assertTrue((mask[:, 200:] == 1).all())
-        self.assertTrue((mask[:, :200] == 0).all())
-
-        inst = detection_utils.annotations_to_instances(
-            [output, output], (400, 400), mask_format="bitmask"
-        )
-        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
-
-    def test_transform_RLE_resize(self):
-        transforms = T.TransformList(
-            [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
-        )
-        mask = np.zeros((300, 400), order="F").astype("uint8")
-        mask[:, :200] = 1
-
-        anno = {
-            "bbox": np.asarray([10, 10, 200, 300]),
-            "bbox_mode": BoxMode.XYXY_ABS,
-            "segmentation": mask_util.encode(mask[:, :, None])[0],
-            "category_id": 3,
-        }
-        output = detection_utils.transform_instance_annotations(
-            copy.deepcopy(anno), transforms, (400, 400)
-        )
-
-        inst = detection_utils.annotations_to_instances(
-            [output, output], (400, 400), mask_format="bitmask"
-        )
-        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
-
-    def test_gen_crop(self):
-        instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
-        t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance)
-        # the box center must fall into the cropped region
-        self.assertTrue(t.x0 <= 55 <= t.x0 + t.w)
-
-    def test_gen_crop_outside_boxes(self):
-        instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
-        with self.assertRaises(AssertionError):
-            detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py
deleted file mode 100644
index 45faf7e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import unittest
-
-from detectron2.data.transforms.transform import RotationTransform
-
-
-class TestRotationTransform(unittest.TestCase):
-    def assertEqualsArrays(self, a1, a2):
-        self.assertTrue(np.allclose(a1, a2))
-
-    def randomData(self, h=5, w=5):
-        image = np.random.rand(h, w)
-        coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float)
-        return image, coords, h, w
-
-    def test180(self):
-        image, coords, h, w = self.randomData(6, 6)
-        rot = RotationTransform(h, w, 180, expand=False, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1])
-        rotated_coords = [[w - c[0], h - c[1]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test45_coords(self):
-        _, coords, h, w = self.randomData(4, 6)
-        rot = RotationTransform(h, w, 45, expand=False, center=None)
-        rotated_coords = [
-            [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)]
-            for (x, y) in coords
-        ]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test90(self):
-        image, coords, h, w = self.randomData()
-        rot = RotationTransform(h, w, 90, expand=False, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
-        rotated_coords = [[c[1], w - c[0]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test90_expand(self):  # non-square image
-        image, coords, h, w = self.randomData(h=5, w=8)
-        rot = RotationTransform(h, w, 90, expand=True, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
-        rotated_coords = [[c[1], w - c[0]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test_center_expand(self):
-        # center has no effect if expand=True because it only affects shifting
-        image, coords, h, w = self.randomData(h=5, w=8)
-        angle = np.random.randint(360)
-        rot1 = RotationTransform(h, w, angle, expand=True, center=None)
-        rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0))
-        rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w))
-        rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5))
-        for r1 in [rot1, rot2, rot3, rot4]:
-            for r2 in [rot1, rot2, rot3, rot4]:
-                self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image))
-                self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py
deleted file mode 100644
index 1256a87..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-import unittest
-from torch.utils.data.sampler import SequentialSampler
-
-from detectron2.data.samplers import GroupedBatchSampler
-
-
-class TestGroupedBatchSampler(unittest.TestCase):
-    def test_missing_group_id(self):
-        sampler = SequentialSampler(list(range(100)))
-        group_ids = [1] * 100
-        samples = GroupedBatchSampler(sampler, group_ids, 2)
-
-        for mini_batch in samples:
-            self.assertEqual(len(mini_batch), 2)
-
-    def test_groups(self):
-        sampler = SequentialSampler(list(range(100)))
-        group_ids = [1, 0] * 50
-        samples = GroupedBatchSampler(sampler, group_ids, 2)
-
-        for mini_batch in samples:
-            self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py
deleted file mode 100644
index 6d85518..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import logging
-import numpy as np
-import unittest
-from unittest import mock
-
-from detectron2.config import get_cfg
-from detectron2.data import detection_utils
-from detectron2.data import transforms as T
-from detectron2.utils.logger import setup_logger
-
-logger = logging.getLogger(__name__)
-
-
-class TestTransforms(unittest.TestCase):
-    def setUp(self):
-        setup_logger()
-
-    def test_apply_rotated_boxes(self):
-        np.random.seed(125)
-        cfg = get_cfg()
-        is_train = True
-        transform_gen = detection_utils.build_transform_gen(cfg, is_train)
-        image = np.random.rand(200, 300)
-        image, transforms = T.apply_transform_gens(transform_gen, image)
-        image_shape = image.shape[:2]  # h, w
-        assert image_shape == (800, 1200)
-        annotation = {"bbox": [179, 97, 62, 40, -56]}
-
-        boxes = np.array([annotation["bbox"]], dtype=np.float64)  # boxes.shape = (1, 5)
-        transformed_bbox = transforms.apply_rotated_box(boxes)[0]
-
-        expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64)
-        err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox)
-        assert np.allclose(transformed_bbox, expected_bbox), err_msg
-
-    def test_apply_rotated_boxes_unequal_scaling_factor(self):
-        np.random.seed(125)
-        h, w = 400, 200
-        newh, neww = 800, 800
-        image = np.random.rand(h, w)
-        transform_gen = []
-        transform_gen.append(T.Resize(shape=(newh, neww)))
-        image, transforms = T.apply_transform_gens(transform_gen, image)
-        image_shape = image.shape[:2]  # h, w
-        assert image_shape == (newh, neww)
-
-        boxes = np.array(
-            [
-                [150, 100, 40, 20, 0],
-                [150, 100, 40, 20, 30],
-                [150, 100, 40, 20, 90],
-                [150, 100, 40, 20, -90],
-            ],
-            dtype=np.float64,
-        )
-        transformed_boxes = transforms.apply_rotated_box(boxes)
-
-        expected_bboxes = np.array(
-            [
-                [600, 200, 160, 40, 0],
-                [600, 200, 144.22205102, 52.91502622, 49.10660535],
-                [600, 200, 80, 80, 90],
-                [600, 200, 80, 80, -90],
-            ],
-            dtype=np.float64,
-        )
-        err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes)
-        assert np.allclose(transformed_boxes, expected_bboxes), err_msg
-
-    def test_print_transform_gen(self):
-        t = T.RandomCrop("relative", (100, 100))
-        self.assertTrue(str(t) == "RandomCrop(crop_type='relative', crop_size=(100, 100))")
-
-        t = T.RandomFlip(prob=0.5)
-        self.assertTrue(str(t) == "RandomFlip(prob=0.5)")
-
-        t = T.RandomFlip()
-        self.assertTrue(str(t) == "RandomFlip()")
-
-    def test_random_apply_prob_out_of_range_check(self):
-        # GIVEN
-        test_probabilities = {0.0: True, 0.5: True, 1.0: True, -0.01: False, 1.01: False}
-
-        # WHEN
-        for given_probability, is_valid in test_probabilities.items():
-            # THEN
-            if not is_valid:
-                self.assertRaises(AssertionError, T.RandomApply, None, prob=given_probability)
-            else:
-                T.RandomApply(T.NoOpTransform(), prob=given_probability)
-
-    def test_random_apply_wrapping_transform_gen_probability_occured_evaluation(self):
-        # GIVEN
-        transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen)
-        image_mock = mock.MagicMock(name="MockImage")
-        random_apply = T.RandomApply(transform_mock, prob=0.001)
-
-        # WHEN
-        with mock.patch.object(random_apply, "_rand_range", return_value=0.0001):
-            transform = random_apply.get_transform(image_mock)
-
-        # THEN
-        transform_mock.get_transform.assert_called_once_with(image_mock)
-        self.assertIsNot(transform, transform_mock)
-
-    def test_random_apply_wrapping_std_transform_probability_occured_evaluation(self):
-        # GIVEN
-        transform_mock = mock.MagicMock(name="MockTransform", spec=T.Transform)
-        image_mock = mock.MagicMock(name="MockImage")
-        random_apply = T.RandomApply(transform_mock, prob=0.001)
-
-        # WHEN
-        with mock.patch.object(random_apply, "_rand_range", return_value=0.0001):
-            transform = random_apply.get_transform(image_mock)
-
-        # THEN
-        self.assertIs(transform, transform_mock)
-
-    def test_random_apply_probability_not_occured_evaluation(self):
-        # GIVEN
-        transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen)
-        image_mock = mock.MagicMock(name="MockImage")
-        random_apply = T.RandomApply(transform_mock, prob=0.001)
-
-        # WHEN
-        with mock.patch.object(random_apply, "_rand_range", return_value=0.9):
-            transform = random_apply.get_transform(image_mock)
-
-        # THEN
-        transform_mock.get_transform.assert_not_called()
-        self.assertIsInstance(transform, T.NoOpTransform)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py
deleted file mode 100644
index d180627..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import contextlib
-import io
-import numpy as np
-import unittest
-from collections import defaultdict
-import torch
-import tqdm
-from fvcore.common.benchmark import benchmark
-from fvcore.common.file_io import PathManager
-from pycocotools.coco import COCO
-from tabulate import tabulate
-from torch.nn import functional as F
-
-from detectron2.data import MetadataCatalog
-from detectron2.layers.mask_ops import (
-    pad_masks,
-    paste_mask_in_image_old,
-    paste_masks_in_image,
-    scale_boxes,
-)
-from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks
-from detectron2.structures.masks import polygons_to_bitmask
-
-
-def iou_between_full_image_bit_masks(a, b):
-    intersect = (a & b).sum()
-    union = (a | b).sum()
-    return intersect / union
-
-
-def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5):
-    x0, y0, x1, y1 = box[0], box[1], box[2], box[3]
-
-    img_h, img_w = full_image_bit_mask.shape
-
-    mask_y = np.arange(0.0, mask_size) + 0.5  # mask y sample coords in [0.5, mask_size - 0.5]
-    mask_x = np.arange(0.0, mask_size) + 0.5  # mask x sample coords in [0.5, mask_size - 0.5]
-    mask_y = mask_y / mask_size * (y1 - y0) + y0
-    mask_x = mask_x / mask_size * (x1 - x0) + x0
-
-    mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1
-    mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1
-    gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x))
-    ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32)
-
-    full_image_bit_mask = torch.from_numpy(full_image_bit_mask)
-    mask = F.grid_sample(
-        full_image_bit_mask[None, None, :, :].to(dtype=torch.float32),
-        ind[None, :, :, :],
-        align_corners=True,
-    )
-
-    return mask[0, 0] >= threshold
-
-
-class TestMaskCropPaste(unittest.TestCase):
-    def setUp(self):
-        json_file = MetadataCatalog.get("coco_2017_val_100").json_file
-        if not PathManager.isfile(json_file):
-            raise unittest.SkipTest("{} not found".format(json_file))
-        with contextlib.redirect_stdout(io.StringIO()):
-            json_file = PathManager.get_local_path(json_file)
-            self.coco = COCO(json_file)
-
-    def test_crop_paste_consistency(self):
-        """
-        rasterize_polygons_within_box (used in training)
-        and
-        paste_masks_in_image (used in inference)
-        should be inverse operations to each other.
-
-        This function runs several implementation of the above two operations and prints
-        the reconstruction error.
-        """
-
-        anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False))  # avoid crowd annotations
-
-        selected_anns = anns[:100]
-
-        ious = []
-        for ann in tqdm.tqdm(selected_anns):
-            results = self.process_annotation(ann)
-            ious.append([k[2] for k in results])
-
-        ious = np.array(ious)
-        mean_ious = ious.mean(axis=0)
-        table = []
-        res_dic = defaultdict(dict)
-        for row, iou in zip(results, mean_ious):
-            table.append((row[0], row[1], iou))
-            res_dic[row[0]][row[1]] = iou
-        print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple"))
-        # assert that the reconstruction is good:
-        self.assertTrue(res_dic["polygon"]["aligned"] > 0.94)
-        self.assertTrue(res_dic["roialign"]["aligned"] > 0.95)
-
-    def process_annotation(self, ann, mask_side_len=28):
-        # Parse annotation data
-        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
-        height, width = img_info["height"], img_info["width"]
-        gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]]
-        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
-        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)
-
-        # Run rasterize ..
-        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4)
-        box_bitmasks = {
-            "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
-            "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len),
-            "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize(
-                torch_gt_bbox, mask_side_len
-            )[0],
-        }
-
-        # Run paste ..
-        results = defaultdict(dict)
-        for k, box_bitmask in box_bitmasks.items():
-            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
-            scaled_boxes = scale_boxes(torch_gt_bbox, scale)
-
-            r = results[k]
-            r["old"] = paste_mask_in_image_old(
-                padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5
-            )
-            r["aligned"] = paste_masks_in_image(
-                box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width)
-            )[0]
-
-        table = []
-        for rasterize_method, r in results.items():
-            for paste_method, mask in r.items():
-                mask = np.asarray(mask)
-                iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask)
-                table.append((rasterize_method, paste_method, iou))
-        return table
-
-    def test_polygon_area(self):
-        # Draw polygon boxes
-        for d in [5.0, 10.0, 1000.0]:
-            polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]])
-            area = polygon.area()[0]
-            target = d ** 2
-            self.assertEqual(area, target)
-
-        # Draw polygon triangles
-        for d in [5.0, 10.0, 1000.0]:
-            polygon = PolygonMasks([[[0, 0, 0, d, d, d]]])
-            area = polygon.area()[0]
-            target = d ** 2 / 2
-            self.assertEqual(area, target)
-
-
-def benchmark_paste():
-    S = 800
-    H, W = image_shape = (S, S)
-    N = 64
-    torch.manual_seed(42)
-    masks = torch.rand(N, 28, 28)
-
-    center = torch.rand(N, 2) * 600 + 100
-    wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50)
-    x0y0 = torch.clamp(center - wh * 0.5, min=0.0)
-    x1y1 = torch.clamp(center + wh * 0.5, max=S)
-    boxes = Boxes(torch.cat([x0y0, x1y1], axis=1))
-
-    def func(device, n=3):
-        m = masks.to(device=device)
-        b = boxes.to(device=device)
-
-        def bench():
-            for _ in range(n):
-                paste_masks_in_image(m, b, image_shape)
-            if device.type == "cuda":
-                torch.cuda.synchronize()
-
-        return bench
-
-    specs = [{"device": torch.device("cpu"), "n": 3}]
-    if torch.cuda.is_available():
-        specs.append({"device": torch.device("cuda"), "n": 3})
-
-    benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
-
-
-if __name__ == "__main__":
-    benchmark_paste()
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py
deleted file mode 100644
index 94b346c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from __future__ import absolute_import, division, print_function, unicode_literals
-import numpy as np
-import unittest
-import torch
-from torchvision import ops
-
-from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated
-
-
-def nms_edit_distance(keep1, keep2):
-    """
-    Compare the "keep" result of two nms call.
-    They are allowed to be different in terms of edit distance
-    due to floating point precision issues, e.g.,
-    if a box happen to have an IoU of 0.5 with another box,
-    one implentation may choose to keep it while another may discard it.
-    """
-    if torch.equal(keep1, keep2):
-        # they should be equal most of the time
-        return 0
-    keep1, keep2 = tuple(keep1.cpu()), tuple(keep2.cpu())
-    m, n = len(keep1), len(keep2)
-
-    # edit distance with DP
-    f = [np.arange(n + 1), np.arange(n + 1)]
-    for i in range(m):
-        cur_row = i % 2
-        other_row = (i + 1) % 2
-        f[other_row][0] = i + 1
-        for j in range(n):
-            f[other_row][j + 1] = (
-                f[cur_row][j]
-                if keep1[i] == keep2[j]
-                else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1
-            )
-    return f[m % 2][n]
-
-
-class TestNMSRotated(unittest.TestCase):
-    def reference_horizontal_nms(self, boxes, scores, iou_threshold):
-        """
-        Args:
-            box_scores (N, 5): boxes in corner-form and probabilities.
-                (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob)
-            iou_threshold: intersection over union threshold.
-        Returns:
-             picked: a list of indexes of the kept boxes
-        """
-        picked = []
-        _, indexes = scores.sort(descending=True)
-        while len(indexes) > 0:
-            current = indexes[0]
-            picked.append(current.item())
-            if len(indexes) == 1:
-                break
-            current_box = boxes[current, :]
-            indexes = indexes[1:]
-            rest_boxes = boxes[indexes, :]
-            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
-            indexes = indexes[iou <= iou_threshold]
-
-        return torch.as_tensor(picked)
-
-    def _create_tensors(self, N):
-        boxes = torch.rand(N, 4) * 100
-        # Note: the implementation of this function in torchvision is:
-        # boxes[:, 2:] += torch.rand(N, 2) * 100
-        # but it does not guarantee non-negative widths/heights constraints:
-        # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
-        boxes[:, 2:] += boxes[:, :2]
-        scores = torch.rand(N)
-        return boxes, scores
-
-    def test_batched_nms_rotated_0_degree_cpu(self):
-        N = 2000
-        num_classes = 50
-        boxes, scores = self._create_tensors(N)
-        idxs = torch.randint(0, num_classes, (N,))
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
-        for iou in [0.2, 0.5, 0.8]:
-            backup = boxes.clone()
-            keep_ref = batched_nms(boxes, scores, idxs, iou)
-            assert torch.allclose(boxes, backup), "boxes modified by batched_nms"
-            backup = rotated_boxes.clone()
-            keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
-            assert torch.allclose(
-                rotated_boxes, backup
-            ), "rotated_boxes modified by batched_nms_rotated"
-            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_batched_nms_rotated_0_degree_cuda(self):
-        N = 2000
-        num_classes = 50
-        boxes, scores = self._create_tensors(N)
-        idxs = torch.randint(0, num_classes, (N,))
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
-        for iou in [0.2, 0.5, 0.8]:
-            backup = boxes.clone()
-            keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou)
-            self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms")
-            backup = rotated_boxes.clone()
-            keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou)
-            self.assertTrue(
-                torch.allclose(rotated_boxes, backup),
-                "rotated_boxes modified by batched_nms_rotated",
-            )
-            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
-
-    def test_nms_rotated_0_degree_cpu(self):
-        N = 1000
-        boxes, scores = self._create_tensors(N)
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
-        for iou in [0.5]:
-            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
-            keep = nms_rotated(rotated_boxes, scores, iou)
-            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
-
-    def test_nms_rotated_90_degrees_cpu(self):
-        N = 1000
-        boxes, scores = self._create_tensors(N)
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]:
-        # widths and heights are intentionally swapped here for 90 degrees case
-        # so that the reference horizontal nms could be used
-        rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1]
-        rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0]
-
-        rotated_boxes[:, 4] = torch.ones(N) * 90
-        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
-        for iou in [0.2, 0.5, 0.8]:
-            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
-            keep = nms_rotated(rotated_boxes, scores, iou)
-            assert torch.equal(keep, keep_ref), err_msg.format(iou)
-
-    def test_nms_rotated_180_degrees_cpu(self):
-        N = 1000
-        boxes, scores = self._create_tensors(N)
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        rotated_boxes[:, 4] = torch.ones(N) * 180
-        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
-        for iou in [0.2, 0.5, 0.8]:
-            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
-            keep = nms_rotated(rotated_boxes, scores, iou)
-            assert torch.equal(keep, keep_ref), err_msg.format(iou)
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_nms_rotated_0_degree_cuda(self):
-        N = 1000
-        boxes, scores = self._create_tensors(N)
-        rotated_boxes = torch.zeros(N, 5)
-        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        err_msg = "Rotated NMS incompatible between CPU and CUDA for IoU={}"
-
-        for iou in [0.2, 0.5, 0.8]:
-            r_cpu = nms_rotated(rotated_boxes, scores, iou)
-            r_cuda = nms_rotated(rotated_boxes.cuda(), scores.cuda(), iou)
-
-            assert torch.equal(r_cpu, r_cuda.cpu()), err_msg.format(iou)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py
deleted file mode 100644
index 633d7c2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import unittest
-import cv2
-import torch
-from fvcore.common.benchmark import benchmark
-
-from detectron2.layers.roi_align import ROIAlign
-
-
-class ROIAlignTest(unittest.TestCase):
-    def test_forward_output(self):
-        input = np.arange(25).reshape(5, 5).astype("float32")
-        """
-        0  1  2   3 4
-        5  6  7   8 9
-        10 11 12 13 14
-        15 16 17 18 19
-        20 21 22 23 24
-        """
-
-        output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False)
-        output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True)
-
-        # without correction:
-        old_results = [
-            [7.5, 8, 8.5, 9],
-            [10, 10.5, 11, 11.5],
-            [12.5, 13, 13.5, 14],
-            [15, 15.5, 16, 16.5],
-        ]
-
-        # with 0.5 correction:
-        correct_results = [
-            [4.5, 5.0, 5.5, 6.0],
-            [7.0, 7.5, 8.0, 8.5],
-            [9.5, 10.0, 10.5, 11.0],
-            [12.0, 12.5, 13.0, 13.5],
-        ]
-        # This is an upsampled version of [[6, 7], [11, 12]]
-
-        self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten()))
-        self.assertTrue(
-            np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten())
-        )
-
-        # Also see similar issues in tensorflow at
-        # https://github.com/tensorflow/tensorflow/issues/26278
-
-    def test_resize(self):
-        H, W = 30, 30
-        input = np.random.rand(H, W).astype("float32") * 100
-        box = [10, 10, 20, 20]
-        output = self._simple_roialign(input, box, (5, 5), aligned=True)
-
-        input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
-        box2x = [x / 2 for x in box]
-        output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True)
-        diff = np.abs(output2x - output)
-        self.assertTrue(diff.max() < 1e-4)
-
-    def _simple_roialign(self, img, box, resolution, aligned=True):
-        """
-        RoiAlign with scale 1.0 and 0 sample ratio.
-        """
-        if isinstance(resolution, int):
-            resolution = (resolution, resolution)
-        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
-        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
-
-        rois = [0] + list(box)
-        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
-        output = op.forward(input, rois)
-        if torch.cuda.is_available():
-            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
-            self.assertTrue(torch.allclose(output, output_cuda))
-        return output[0, 0]
-
-    def _simple_roialign_with_grad(self, img, box, resolution, device):
-        if isinstance(resolution, int):
-            resolution = (resolution, resolution)
-
-        op = ROIAlign(resolution, 1.0, 0, aligned=True)
-        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
-
-        rois = [0] + list(box)
-        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
-        input = input.to(device=device)
-        rois = rois.to(device=device)
-        input.requires_grad = True
-        output = op.forward(input, rois)
-        return input, output
-
-    def test_empty_box(self):
-        img = np.random.rand(5, 5)
-        box = [3, 4, 5, 4]
-        o = self._simple_roialign(img, box, 7)
-        self.assertTrue(o.shape == (7, 7))
-        self.assertTrue((o == 0).all())
-
-        for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev))
-            output.sum().backward()
-            self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input)))
-
-    def test_empty_batch(self):
-        input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
-        rois = torch.zeros(0, 5, dtype=torch.float32)
-        op = ROIAlign((7, 7), 1.0, 0, aligned=True)
-        output = op.forward(input, rois)
-        self.assertTrue(output.shape == (0, 3, 7, 7))
-
-
-def benchmark_roi_align():
-    from detectron2 import _C
-
-    def random_boxes(mean_box, stdev, N, maxsize):
-        ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
-        ret.clamp_(min=0, max=maxsize)
-        return ret
-
-    def func(N, C, H, W, nboxes_per_img):
-        input = torch.rand(N, C, H, W)
-        boxes = []
-        batch_idx = []
-        for k in range(N):
-            b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H)
-            # try smaller boxes:
-            # b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H)
-            boxes.append(b)
-            batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k)
-        boxes = torch.cat(boxes, axis=0)
-        batch_idx = torch.cat(batch_idx, axis=0)
-        boxes = torch.cat([batch_idx, boxes], axis=1)
-
-        input = input.cuda()
-        boxes = boxes.cuda()
-
-        def bench():
-            _C.roi_align_forward(input, boxes, 1.0, 7, 7, 0, True)
-            torch.cuda.synchronize()
-
-        return bench
-
-    args = [dict(N=2, C=512, H=256, W=256, nboxes_per_img=500)]
-    benchmark(func, "cuda_roialign", args, num_iters=20, warmup_iters=1)
-
-
-if __name__ == "__main__":
-    if torch.cuda.is_available():
-        benchmark_roi_align()
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py
deleted file mode 100644
index 1915b59..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import cv2
-import torch
-from torch.autograd import Variable, gradcheck
-
-from detectron2.layers.roi_align import ROIAlign
-from detectron2.layers.roi_align_rotated import ROIAlignRotated
-
-logger = logging.getLogger(__name__)
-
-
-class ROIAlignRotatedTest(unittest.TestCase):
-    def _box_to_rotated_box(self, box, angle):
-        return [
-            (box[0] + box[2]) / 2.0,
-            (box[1] + box[3]) / 2.0,
-            box[2] - box[0],
-            box[3] - box[1],
-            angle,
-        ]
-
-    def _rot90(self, img, num):
-        num = num % 4  # note: -1 % 4 == 3
-        for _ in range(num):
-            img = img.transpose(0, 1).flip(0)
-        return img
-
-    def test_forward_output_0_90_180_270(self):
-        for i in range(4):
-            # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees
-            img = torch.arange(25, dtype=torch.float32).reshape(5, 5)
-            """
-            0  1  2   3 4
-            5  6  7   8 9
-            10 11 12 13 14
-            15 16 17 18 19
-            20 21 22 23 24
-            """
-            box = [1, 1, 3, 3]
-            rotated_box = self._box_to_rotated_box(box=box, angle=90 * i)
-
-            result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4))
-
-            # Here's an explanation for 0 degree case:
-            # point 0 in the original input lies at [0.5, 0.5]
-            # (the center of bin [0, 1] x [0, 1])
-            # point 1 in the original input lies at [1.5, 0.5], etc.
-            # since the resolution is (4, 4) that divides [1, 3] x [1, 3]
-            # into 4 x 4 equal bins,
-            # the top-left bin is [1, 1.5] x [1, 1.5], and its center
-            # (1.25, 1.25) lies at the 3/4 position
-            # between point 0 and point 1, point 5 and point 6,
-            # point 0 and point 5, point 1 and point 6, so it can be calculated as
-            # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5
-            result_expected = torch.tensor(
-                [
-                    [4.5, 5.0, 5.5, 6.0],
-                    [7.0, 7.5, 8.0, 8.5],
-                    [9.5, 10.0, 10.5, 11.0],
-                    [12.0, 12.5, 13.0, 13.5],
-                ]
-            )
-            # This is also an upsampled version of [[6, 7], [11, 12]]
-
-            # When the box is rotated by 90 degrees CCW,
-            # the result would be rotated by 90 degrees CW, thus it's -i here
-            result_expected = self._rot90(result_expected, -i)
-
-            assert torch.allclose(result, result_expected)
-
-    def test_resize(self):
-        H, W = 30, 30
-        input = torch.rand(H, W) * 100
-        box = [10, 10, 20, 20]
-        rotated_box = self._box_to_rotated_box(box, angle=0)
-        output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5))
-
-        input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
-        input2x = torch.from_numpy(input2x)
-        box2x = [x / 2 for x in box]
-        rotated_box2x = self._box_to_rotated_box(box2x, angle=0)
-        output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5))
-        assert torch.allclose(output2x, output)
-
-    def _simple_roi_align_rotated(self, img, box, resolution):
-        """
-        RoiAlignRotated with scale 1.0 and 0 sample ratio.
-        """
-        op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0)
-        input = img[None, None, :, :]
-
-        rois = [0] + list(box)
-        rois = torch.tensor(rois, dtype=torch.float32)[None, :]
-        result_cpu = op.forward(input, rois)
-        if torch.cuda.is_available():
-            result_cuda = op.forward(input.cuda(), rois.cuda())
-            assert torch.allclose(result_cpu, result_cuda.cpu())
-        return result_cpu[0, 0]
-
-    def test_empty_box(self):
-        img = torch.rand(5, 5)
-        out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7))
-        self.assertTrue((out == 0).all())
-
-    def test_roi_align_rotated_gradcheck_cpu(self):
-        dtype = torch.float64
-        device = torch.device("cpu")
-        roi_align_rotated_op = ROIAlignRotated(
-            output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1
-        ).to(dtype=dtype, device=device)
-        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
-        # roi format is (batch index, x_center, y_center, width, height, angle)
-        rois = torch.tensor(
-            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
-            dtype=dtype,
-            device=device,
-        )
-
-        def func(input):
-            return roi_align_rotated_op(input, rois)
-
-        assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU"
-        assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU"
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_roi_align_rotated_gradient_cuda(self):
-        """
-        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
-        and compare the result with ROIAlign
-        """
-        # torch.manual_seed(123)
-        dtype = torch.float64
-        device = torch.device("cuda")
-        pool_h, pool_w = (5, 5)
-
-        roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(
-            device=device
-        )
-
-        roi_align_rotated = ROIAlignRotated(
-            output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2
-        ).to(device=device)
-
-        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
-        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
-        x_rotated = Variable(x.data.clone(), requires_grad=True)
-
-        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
-        rois_rotated = torch.tensor(
-            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
-            dtype=dtype,
-            device=device,
-        )
-
-        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
-        s_rotated = y_rotated.sum()
-        s_rotated.backward()
-
-        # roi format is (batch index, x1, y1, x2, y2)
-        rois = torch.tensor(
-            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device
-        )
-
-        y = roi_align(x, rois)
-        s = y.sum()
-        s.backward()
-
-        assert torch.allclose(
-            x.grad, x_rotated.grad
-        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py
deleted file mode 100644
index bc14f02..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.config import get_cfg
-from detectron2.layers import ShapeSpec
-from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator
-
-logger = logging.getLogger(__name__)
-
-
-class TestAnchorGenerator(unittest.TestCase):
-    def test_default_anchor_generator(self):
-        cfg = get_cfg()
-        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
-        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
-
-        anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)])
-
-        # only the last two dimensions of features matter here
-        num_images = 2
-        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
-        anchors = anchor_generator([features["stage3"]])
-        expected_anchor_tensor = torch.tensor(
-            [
-                [-32.0, -8.0, 32.0, 8.0],
-                [-16.0, -16.0, 16.0, 16.0],
-                [-8.0, -32.0, 8.0, 32.0],
-                [-64.0, -16.0, 64.0, 16.0],
-                [-32.0, -32.0, 32.0, 32.0],
-                [-16.0, -64.0, 16.0, 64.0],
-                [-28.0, -8.0, 36.0, 8.0],  # -28.0 == -32.0 + STRIDE (4)
-                [-12.0, -16.0, 20.0, 16.0],
-                [-4.0, -32.0, 12.0, 32.0],
-                [-60.0, -16.0, 68.0, 16.0],
-                [-28.0, -32.0, 36.0, 32.0],
-                [-12.0, -64.0, 20.0, 64.0],
-            ]
-        )
-
-        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
-
-    def test_default_anchor_generator_centered(self):
-        # test explicit args
-        anchor_generator = DefaultAnchorGenerator(
-            sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4]
-        )
-
-        # only the last two dimensions of features matter here
-        num_images = 2
-        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
-        expected_anchor_tensor = torch.tensor(
-            [
-                [-30.0, -6.0, 34.0, 10.0],
-                [-14.0, -14.0, 18.0, 18.0],
-                [-6.0, -30.0, 10.0, 34.0],
-                [-62.0, -14.0, 66.0, 18.0],
-                [-30.0, -30.0, 34.0, 34.0],
-                [-14.0, -62.0, 18.0, 66.0],
-                [-26.0, -6.0, 38.0, 10.0],
-                [-10.0, -14.0, 22.0, 18.0],
-                [-2.0, -30.0, 14.0, 34.0],
-                [-58.0, -14.0, 70.0, 18.0],
-                [-26.0, -30.0, 38.0, 34.0],
-                [-10.0, -62.0, 22.0, 66.0],
-            ]
-        )
-
-        anchors = anchor_generator([features["stage3"]])
-        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
-
-        # doesn't work yet
-        # anchors = torch.jit.script(anchor_generator)([features["stage3"]])
-        # assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
-
-    def test_rrpn_anchor_generator(self):
-        cfg = get_cfg()
-        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
-        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
-        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45]  # test single list[float]
-        anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)])
-
-        # only the last two dimensions of features matter here
-        num_images = 2
-        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
-        anchors = anchor_generator([features["stage3"]])
-        expected_anchor_tensor = torch.tensor(
-            [
-                [0.0, 0.0, 64.0, 16.0, 0.0],
-                [0.0, 0.0, 64.0, 16.0, 45.0],
-                [0.0, 0.0, 32.0, 32.0, 0.0],
-                [0.0, 0.0, 32.0, 32.0, 45.0],
-                [0.0, 0.0, 16.0, 64.0, 0.0],
-                [0.0, 0.0, 16.0, 64.0, 45.0],
-                [0.0, 0.0, 128.0, 32.0, 0.0],
-                [0.0, 0.0, 128.0, 32.0, 45.0],
-                [0.0, 0.0, 64.0, 64.0, 0.0],
-                [0.0, 0.0, 64.0, 64.0, 45.0],
-                [0.0, 0.0, 32.0, 128.0, 0.0],
-                [0.0, 0.0, 32.0, 128.0, 45.0],
-                [4.0, 0.0, 64.0, 16.0, 0.0],  # 4.0 == 0.0 + STRIDE (4)
-                [4.0, 0.0, 64.0, 16.0, 45.0],
-                [4.0, 0.0, 32.0, 32.0, 0.0],
-                [4.0, 0.0, 32.0, 32.0, 45.0],
-                [4.0, 0.0, 16.0, 64.0, 0.0],
-                [4.0, 0.0, 16.0, 64.0, 45.0],
-                [4.0, 0.0, 128.0, 32.0, 0.0],
-                [4.0, 0.0, 128.0, 32.0, 45.0],
-                [4.0, 0.0, 64.0, 64.0, 0.0],
-                [4.0, 0.0, 64.0, 64.0, 45.0],
-                [4.0, 0.0, 32.0, 128.0, 0.0],
-                [4.0, 0.0, 32.0, 128.0, 45.0],
-            ]
-        )
-
-        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py
deleted file mode 100644
index 9d124d7..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
-
-logger = logging.getLogger(__name__)
-
-
-def random_boxes(mean_box, stdev, N):
-    return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
-
-
-class TestBox2BoxTransform(unittest.TestCase):
-    def test_reconstruction(self):
-        weights = (5, 5, 10, 10)
-        b2b_tfm = Box2BoxTransform(weights=weights)
-        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
-        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
-
-        devices = [torch.device("cpu")]
-        if torch.cuda.is_available():
-            devices.append(torch.device("cuda"))
-        for device in devices:
-            src_boxes = src_boxes.to(device=device)
-            dst_boxes = dst_boxes.to(device=device)
-            deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
-            dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
-            assert torch.allclose(dst_boxes, dst_boxes_reconstructed)
-
-
-def random_rotated_boxes(mean_box, std_length, std_angle, N):
-    return torch.cat(
-        [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1
-    ) + torch.tensor(mean_box, dtype=torch.float)
-
-
-class TestBox2BoxTransformRotated(unittest.TestCase):
-    def test_reconstruction(self):
-        weights = (5, 5, 10, 10, 1)
-        b2b_transform = Box2BoxTransformRotated(weights=weights)
-        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
-        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
-
-        devices = [torch.device("cpu")]
-        if torch.cuda.is_available():
-            devices.append(torch.device("cuda"))
-        for device in devices:
-            src_boxes = src_boxes.to(device=device)
-            dst_boxes = dst_boxes.to(device=device)
-            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
-            dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes)
-            assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5)
-            # angle difference has to be normalized
-            assert torch.allclose(
-                (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0,
-                torch.zeros_like(dst_boxes[:, 4]),
-                atol=1e-4,
-            )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py
deleted file mode 100644
index 70b64d3..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.layers import ShapeSpec
-from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
-from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
-from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers
-from detectron2.structures import Boxes, Instances, RotatedBoxes
-from detectron2.utils.events import EventStorage
-
-logger = logging.getLogger(__name__)
-
-
-class FastRCNNTest(unittest.TestCase):
-    def test_fast_rcnn(self):
-        torch.manual_seed(132)
-
-        box_head_output_size = 8
-
-        box_predictor = FastRCNNOutputLayers(
-            ShapeSpec(channels=box_head_output_size),
-            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
-            num_classes=5,
-        )
-        feature_pooled = torch.rand(2, box_head_output_size)
-        predictions = box_predictor(feature_pooled)
-
-        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32)
-        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
-        proposal = Instances((10, 10))
-        proposal.proposal_boxes = Boxes(proposal_boxes)
-        proposal.gt_boxes = Boxes(gt_boxes)
-        proposal.gt_classes = torch.tensor([1, 2])
-
-        with EventStorage():  # capture events in a new storage to discard them
-            losses = box_predictor.losses(predictions, [proposal])
-
-        expected_losses = {
-            "loss_cls": torch.tensor(1.7951188087),
-            "loss_box_reg": torch.tensor(4.0357131958),
-        }
-        for name in expected_losses.keys():
-            assert torch.allclose(losses[name], expected_losses[name])
-
-    def test_fast_rcnn_empty_batch(self, device="cpu"):
-        box_predictor = FastRCNNOutputLayers(
-            ShapeSpec(channels=10),
-            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
-            num_classes=8,
-        ).to(device=device)
-
-        logits = torch.randn(0, 100, requires_grad=True, device=device)
-        deltas = torch.randn(0, 4, requires_grad=True, device=device)
-        losses = box_predictor.losses([logits, deltas], [])
-        for value in losses.values():
-            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
-        sum(losses.values()).backward()
-        self.assertTrue(logits.grad is not None)
-        self.assertTrue(deltas.grad is not None)
-
-        predictions, _ = box_predictor.inference([logits, deltas], [])
-        self.assertEqual(len(predictions), 0)
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_fast_rcnn_empty_batch_cuda(self):
-        self.test_fast_rcnn_empty_batch(device=torch.device("cuda"))
-
-    def test_fast_rcnn_rotated(self):
-        torch.manual_seed(132)
-        box_head_output_size = 8
-
-        box_predictor = RotatedFastRCNNOutputLayers(
-            ShapeSpec(channels=box_head_output_size),
-            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)),
-            num_classes=5,
-        )
-        feature_pooled = torch.rand(2, box_head_output_size)
-        predictions = box_predictor(feature_pooled)
-        proposal_boxes = torch.tensor(
-            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32
-        )
-        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
-        proposal = Instances((10, 10))
-        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
-        proposal.gt_boxes = RotatedBoxes(gt_boxes)
-        proposal.gt_classes = torch.tensor([1, 2])
-
-        with EventStorage():  # capture events in a new storage to discard them
-            losses = box_predictor.losses(predictions, [proposal])
-
-        # Note: the expected losses are slightly different even if
-        # the boxes are essentially the same as in the FastRCNNOutput test, because
-        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
-        # between the two cases.
-        expected_losses = {
-            "loss_cls": torch.tensor(1.7920907736),
-            "loss_box_reg": torch.tensor(4.0410838127),
-        }
-        for name in expected_losses.keys():
-            assert torch.allclose(losses[name], expected_losses[name])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py
deleted file mode 100644
index 95fe6a0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-
-import unittest
-import torch
-
-import detectron2.model_zoo as model_zoo
-from detectron2.config import get_cfg
-from detectron2.modeling import build_model
-from detectron2.structures import BitMasks, Boxes, ImageList, Instances
-from detectron2.utils.events import EventStorage
-
-
-def get_model_zoo(config_path):
-    """
-    Like model_zoo.get, but do not load any weights (even pretrained)
-    """
-    cfg_file = model_zoo.get_config_file(config_path)
-    cfg = get_cfg()
-    cfg.merge_from_file(cfg_file)
-    if not torch.cuda.is_available():
-        cfg.MODEL.DEVICE = "cpu"
-    return build_model(cfg)
-
-
-def create_model_input(img, inst=None):
-    if inst is not None:
-        return {"image": img, "instances": inst}
-    else:
-        return {"image": img}
-
-
-def get_empty_instance(h, w):
-    inst = Instances((h, w))
-    inst.gt_boxes = Boxes(torch.rand(0, 4))
-    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
-    inst.gt_masks = BitMasks(torch.rand(0, h, w))
-    return inst
-
-
-def get_regular_bitmask_instances(h, w):
-    inst = Instances((h, w))
-    inst.gt_boxes = Boxes(torch.rand(3, 4))
-    inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2]
-    inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64)
-    inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5))
-    return inst
-
-
-class ModelE2ETest:
-    def setUp(self):
-        torch.manual_seed(43)
-        self.model = get_model_zoo(self.CONFIG_PATH)
-
-    def _test_eval(self, input_sizes):
-        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
-        self.model.eval()
-        self.model(inputs)
-
-    def _test_train(self, input_sizes, instances):
-        assert len(input_sizes) == len(instances)
-        inputs = [
-            create_model_input(torch.rand(3, s[0], s[1]), inst)
-            for s, inst in zip(input_sizes, instances)
-        ]
-        self.model.train()
-        with EventStorage():
-            losses = self.model(inputs)
-            sum(losses.values()).backward()
-            del losses
-
-    def _inf_tensor(self, *shape):
-        return 1.0 / torch.zeros(*shape, device=self.model.device)
-
-    def _nan_tensor(self, *shape):
-        return torch.zeros(*shape, device=self.model.device).fill_(float("nan"))
-
-    def test_empty_data(self):
-        instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)]
-        self._test_eval([(200, 250), (200, 249)])
-        self._test_train([(200, 250), (200, 249)], instances)
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_eval_tocpu(self):
-        model = get_model_zoo(self.CONFIG_PATH).cpu()
-        model.eval()
-        input_sizes = [(200, 250), (200, 249)]
-        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
-        model(inputs)
-
-
-class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase):
-    CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
-
-    def test_half_empty_data(self):
-        instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)]
-        self._test_train([(200, 250), (200, 249)], instances)
-
-    # This test is flaky because in some environment the output features are zero due to relu
-    # def test_rpn_inf_nan_data(self):
-    #     self.model.eval()
-    #     for tensor in [self._inf_tensor, self._nan_tensor]:
-    #         images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
-    #         features = {
-    #             "p2": tensor(1, 256, 256, 256),
-    #             "p3": tensor(1, 256, 128, 128),
-    #             "p4": tensor(1, 256, 64, 64),
-    #             "p5": tensor(1, 256, 32, 32),
-    #             "p6": tensor(1, 256, 16, 16),
-    #         }
-    #         props, _ = self.model.proposal_generator(images, features)
-    #         self.assertEqual(len(props[0]), 0)
-
-    def test_roiheads_inf_nan_data(self):
-        self.model.eval()
-        for tensor in [self._inf_tensor, self._nan_tensor]:
-            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
-            features = {
-                "p2": tensor(1, 256, 256, 256),
-                "p3": tensor(1, 256, 128, 128),
-                "p4": tensor(1, 256, 64, 64),
-                "p5": tensor(1, 256, 32, 32),
-                "p6": tensor(1, 256, 16, 16),
-            }
-            props = [Instances((510, 510))]
-            props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device)
-            props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1)
-            det, _ = self.model.roi_heads(images, features, props)
-            self.assertEqual(len(det[0]), 0)
-
-
-class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase):
-    CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
-
-    def test_inf_nan_data(self):
-        self.model.eval()
-        self.model.score_threshold = -999999999
-        for tensor in [self._inf_tensor, self._nan_tensor]:
-            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
-            features = [
-                tensor(1, 256, 128, 128),
-                tensor(1, 256, 64, 64),
-                tensor(1, 256, 32, 32),
-                tensor(1, 256, 16, 16),
-                tensor(1, 256, 8, 8),
-            ]
-            anchors = self.model.anchor_generator(features)
-            box_cls, box_delta = self.model.head(features)
-            box_cls = [tensor(*k.shape) for k in box_cls]
-            box_delta = [tensor(*k.shape) for k in box_delta]
-            det = self.model.inference(box_cls, box_delta, anchors, images.image_sizes)
-            # all predictions (if any) are infinite or nan
-            if len(det[0]):
-                self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py
deleted file mode 100644
index 5a06303..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.config import get_cfg
-from detectron2.modeling.backbone import build_backbone
-from detectron2.modeling.proposal_generator.build import build_proposal_generator
-from detectron2.modeling.roi_heads import build_roi_heads
-from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
-from detectron2.utils.events import EventStorage
-
-logger = logging.getLogger(__name__)
-
-
-class ROIHeadsTest(unittest.TestCase):
-    def test_roi_heads(self):
-        torch.manual_seed(121)
-        cfg = get_cfg()
-        cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads"
-        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
-        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
-        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
-        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
-        backbone = build_backbone(cfg)
-        num_images = 2
-        images_tensor = torch.rand(num_images, 20, 30)
-        image_sizes = [(10, 10), (20, 30)]
-        images = ImageList(images_tensor, image_sizes)
-        num_channels = 1024
-        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
-
-        image_shape = (15, 15)
-        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
-        gt_instance0 = Instances(image_shape)
-        gt_instance0.gt_boxes = Boxes(gt_boxes0)
-        gt_instance0.gt_classes = torch.tensor([2, 1])
-        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
-        gt_instance1 = Instances(image_shape)
-        gt_instance1.gt_boxes = Boxes(gt_boxes1)
-        gt_instance1.gt_classes = torch.tensor([1, 2])
-        gt_instances = [gt_instance0, gt_instance1]
-
-        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
-        roi_heads = build_roi_heads(cfg, backbone.output_shape())
-
-        with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
-            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
-
-        expected_losses = {
-            "loss_cls": torch.tensor(4.4236516953),
-            "loss_box_reg": torch.tensor(0.0091214813),
-        }
-        for name in expected_losses.keys():
-            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]))
-
-    def test_rroi_heads(self):
-        torch.manual_seed(121)
-        cfg = get_cfg()
-        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
-        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
-        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
-        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
-        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
-        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
-        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
-        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
-        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
-        backbone = build_backbone(cfg)
-        num_images = 2
-        images_tensor = torch.rand(num_images, 20, 30)
-        image_sizes = [(10, 10), (20, 30)]
-        images = ImageList(images_tensor, image_sizes)
-        num_channels = 1024
-        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
-
-        image_shape = (15, 15)
-        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
-        gt_instance0 = Instances(image_shape)
-        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
-        gt_instance0.gt_classes = torch.tensor([2, 1])
-        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
-        gt_instance1 = Instances(image_shape)
-        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
-        gt_instance1.gt_classes = torch.tensor([1, 2])
-        gt_instances = [gt_instance0, gt_instance1]
-
-        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
-        roi_heads = build_roi_heads(cfg, backbone.output_shape())
-
-        with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
-            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
-
-        expected_losses = {
-            "loss_cls": torch.tensor(4.381618499755859),
-            "loss_box_reg": torch.tensor(0.0011829272843897343),
-        }
-        for name in expected_losses.keys():
-            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
-                name, detector_losses[name], expected_losses[name]
-            )
-            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py
deleted file mode 100644
index 9aa3825..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.modeling.poolers import ROIPooler
-from detectron2.structures import Boxes, RotatedBoxes
-
-logger = logging.getLogger(__name__)
-
-
-class TestROIPooler(unittest.TestCase):
-    def _rand_boxes(self, num_boxes, x_max, y_max):
-        coords = torch.rand(num_boxes, 4)
-        coords[:, 0] *= x_max
-        coords[:, 1] *= y_max
-        coords[:, 2] *= x_max
-        coords[:, 3] *= y_max
-        boxes = torch.zeros(num_boxes, 4)
-        boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2])
-        boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3])
-        boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2])
-        boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3])
-        return boxes
-
-    def _test_roialignv2_roialignrotated_match(self, device):
-        pooler_resolution = 14
-        canonical_level = 4
-        canonical_scale_factor = 2 ** canonical_level
-        pooler_scales = (1.0 / canonical_scale_factor,)
-        sampling_ratio = 0
-
-        N, C, H, W = 2, 4, 10, 8
-        N_rois = 10
-        std = 11
-        mean = 0
-        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
-
-        features = [feature.to(device)]
-
-        rois = []
-        rois_rotated = []
-        for _ in range(N):
-            boxes = self._rand_boxes(
-                num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor
-            )
-
-            rotated_boxes = torch.zeros(N_rois, 5)
-            rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
-            rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
-            rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-            rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-            rois.append(Boxes(boxes).to(device))
-            rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))
-
-        roialignv2_pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type="ROIAlignV2",
-        )
-
-        roialignv2_out = roialignv2_pooler(features, rois)
-
-        roialignrotated_pooler = ROIPooler(
-            output_size=pooler_resolution,
-            scales=pooler_scales,
-            sampling_ratio=sampling_ratio,
-            pooler_type="ROIAlignRotated",
-        )
-
-        roialignrotated_out = roialignrotated_pooler(features, rois_rotated)
-
-        self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
-
-    def test_roialignv2_roialignrotated_match_cpu(self):
-        self._test_roialignv2_roialignrotated_match(device="cpu")
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_roialignv2_roialignrotated_match_cuda(self):
-        self._test_roialignv2_roialignrotated_match(device="cuda")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py
deleted file mode 100644
index 967d210..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-import torch
-
-from detectron2.config import get_cfg
-from detectron2.modeling.backbone import build_backbone
-from detectron2.modeling.proposal_generator.build import build_proposal_generator
-from detectron2.modeling.proposal_generator.rpn_outputs import find_top_rpn_proposals
-from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
-from detectron2.utils.events import EventStorage
-
-logger = logging.getLogger(__name__)
-
-
-class RPNTest(unittest.TestCase):
-    def test_rpn(self):
-        torch.manual_seed(121)
-        cfg = get_cfg()
-        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
-        cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
-        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
-        backbone = build_backbone(cfg)
-        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
-        num_images = 2
-        images_tensor = torch.rand(num_images, 20, 30)
-        image_sizes = [(10, 10), (20, 30)]
-        images = ImageList(images_tensor, image_sizes)
-        image_shape = (15, 15)
-        num_channels = 1024
-        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
-        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
-        gt_instances = Instances(image_shape)
-        gt_instances.gt_boxes = Boxes(gt_boxes)
-        with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(
-                images, features, [gt_instances[0], gt_instances[1]]
-            )
-
-        expected_losses = {
-            "loss_rpn_cls": torch.tensor(0.0804563984),
-            "loss_rpn_loc": torch.tensor(0.0990132466),
-        }
-        for name in expected_losses.keys():
-            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
-                name, proposal_losses[name], expected_losses[name]
-            )
-            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
-
-        expected_proposal_boxes = [
-            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
-            Boxes(
-                torch.tensor(
-                    [
-                        [0, 0, 30, 20],
-                        [0, 0, 16.7862777710, 13.1362524033],
-                        [0, 0, 30, 13.3173446655],
-                        [0, 0, 10.8602609634, 20],
-                        [7.7165775299, 0, 27.3875980377, 20],
-                    ]
-                )
-            ),
-        ]
-
-        expected_objectness_logits = [
-            torch.tensor([0.1225359365, -0.0133192837]),
-            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
-        ]
-
-        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
-            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
-        ):
-            self.assertEqual(len(proposal), len(expected_proposal_box))
-            self.assertEqual(proposal.image_size, im_size)
-            self.assertTrue(
-                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
-            )
-            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
-
-    def test_rrpn(self):
-        torch.manual_seed(121)
-        cfg = get_cfg()
-        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
-        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
-        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
-        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
-        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
-        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
-        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
-        backbone = build_backbone(cfg)
-        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
-        num_images = 2
-        images_tensor = torch.rand(num_images, 20, 30)
-        image_sizes = [(10, 10), (20, 30)]
-        images = ImageList(images_tensor, image_sizes)
-        image_shape = (15, 15)
-        num_channels = 1024
-        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
-        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
-        gt_instances = Instances(image_shape)
-        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
-        with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(
-                images, features, [gt_instances[0], gt_instances[1]]
-            )
-
-        expected_losses = {
-            "loss_rpn_cls": torch.tensor(0.043263837695121765),
-            "loss_rpn_loc": torch.tensor(0.14432406425476074),
-        }
-        for name in expected_losses.keys():
-            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
-                name, proposal_losses[name], expected_losses[name]
-            )
-            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
-
-        expected_proposal_boxes = [
-            RotatedBoxes(
-                torch.tensor(
-                    [
-                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
-                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
-                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
-                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
-                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
-                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
-                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
-                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
-                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
-                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
-                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
-                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
-                    ]
-                )
-            ),
-            RotatedBoxes(
-                torch.tensor(
-                    [
-                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
-                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
-                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
-                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
-                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
-                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
-                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
-                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
-                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
-                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
-                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
-                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
-                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
-                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
-                    ]
-                )
-            ),
-        ]
-
-        expected_objectness_logits = [
-            torch.tensor(
-                [
-                    0.10111768,
-                    0.09112845,
-                    0.08466332,
-                    0.07589971,
-                    0.06650183,
-                    0.06350251,
-                    0.04299347,
-                    0.01864817,
-                    0.00986163,
-                    0.00078543,
-                    -0.04573630,
-                    -0.04799230,
-                ]
-            ),
-            torch.tensor(
-                [
-                    0.11373727,
-                    0.09377633,
-                    0.05281663,
-                    0.05143715,
-                    0.04040275,
-                    0.03250912,
-                    0.01307789,
-                    0.01177734,
-                    0.00038105,
-                    -0.00540255,
-                    -0.01194804,
-                    -0.01461012,
-                    -0.03061717,
-                    -0.03599222,
-                ]
-            ),
-        ]
-
-        torch.set_printoptions(precision=8, sci_mode=False)
-
-        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
-            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
-        ):
-            self.assertEqual(len(proposal), len(expected_proposal_box))
-            self.assertEqual(proposal.image_size, im_size)
-            # It seems that there's some randomness in the result across different machines:
-            # This test can be run on a local machine for 100 times with exactly the same result,
-            # However, a different machine might produce slightly different results,
-            # thus the atol here.
-            err_msg = "computed proposal boxes = {}, expected {}".format(
-                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
-            )
-            self.assertTrue(
-                torch.allclose(
-                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
-                ),
-                err_msg,
-            )
-
-            err_msg = "computed objectness logits = {}, expected {}".format(
-                proposal.objectness_logits, expected_objectness_logit
-            )
-            self.assertTrue(
-                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
-                err_msg,
-            )
-
-    def test_rpn_proposals_inf(self):
-        N, Hi, Wi, A = 3, 3, 3, 3
-        proposals = [torch.rand(N, Hi * Wi * A, 4)]
-        pred_logits = [torch.rand(N, Hi * Wi * A)]
-        pred_logits[0][1][3:5].fill_(float("inf"))
-        images = ImageList.from_tensors([torch.rand(3, 10, 10)] * 3)
-        find_top_rpn_proposals(proposals, pred_logits, images, 0.5, 1000, 1000, 0, False)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py
deleted file mode 100644
index 4d33c3b..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import json
-import math
-import numpy as np
-import unittest
-import torch
-
-from detectron2.structures import Boxes, BoxMode, pairwise_iou
-
-
-class TestBoxMode(unittest.TestCase):
-    def _convert_xy_to_wh(self, x):
-        return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
-
-    def _convert_xywha_to_xyxy(self, x):
-        return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
-
-    def _convert_xywh_to_xywha(self, x):
-        return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
-
-    def test_box_convert_list(self):
-        for tp in [list, tuple]:
-            box = tp([5.0, 5.0, 10.0, 10.0])
-            output = self._convert_xy_to_wh(box)
-            self.assertIsInstance(output, tp)
-            self.assertIsInstance(output[0], float)
-            self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0]))
-
-            with self.assertRaises(Exception):
-                self._convert_xy_to_wh([box])
-
-    def test_box_convert_array(self):
-        box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]])
-        output = self._convert_xy_to_wh(box)
-        self.assertEqual(output.dtype, box.dtype)
-        self.assertEqual(output.shape, box.shape)
-        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
-        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
-
-    def test_box_convert_cpu_tensor(self):
-        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
-        output = self._convert_xy_to_wh(box)
-        self.assertEqual(output.dtype, box.dtype)
-        self.assertEqual(output.shape, box.shape)
-        output = output.numpy()
-        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
-        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_box_convert_cuda_tensor(self):
-        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda()
-        output = self._convert_xy_to_wh(box)
-        self.assertEqual(output.dtype, box.dtype)
-        self.assertEqual(output.shape, box.shape)
-        self.assertEqual(output.device, box.device)
-        output = output.cpu().numpy()
-        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
-        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
-
-    def test_box_convert_xywha_to_xyxy_list(self):
-        for tp in [list, tuple]:
-            box = tp([50, 50, 30, 20, 0])
-            output = self._convert_xywha_to_xyxy(box)
-            self.assertIsInstance(output, tp)
-            self.assertEqual(output, tp([35, 40, 65, 60]))
-
-            with self.assertRaises(Exception):
-                self._convert_xywha_to_xyxy([box])
-
-    def test_box_convert_xywha_to_xyxy_array(self):
-        for dtype in [np.float64, np.float32]:
-            box = np.asarray(
-                [
-                    [50, 50, 30, 20, 0],
-                    [50, 50, 30, 20, 90],
-                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
-                ],
-                dtype=dtype,
-            )
-            output = self._convert_xywha_to_xyxy(box)
-            self.assertEqual(output.dtype, box.dtype)
-            expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
-            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
-
-    def test_box_convert_xywha_to_xyxy_tensor(self):
-        for dtype in [torch.float32, torch.float64]:
-            box = torch.tensor(
-                [
-                    [50, 50, 30, 20, 0],
-                    [50, 50, 30, 20, 90],
-                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
-                ],
-                dtype=dtype,
-            )
-            output = self._convert_xywha_to_xyxy(box)
-            self.assertEqual(output.dtype, box.dtype)
-            expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
-
-            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
-
-    def test_box_convert_xywh_to_xywha_list(self):
-        for tp in [list, tuple]:
-            box = tp([50, 50, 30, 20])
-            output = self._convert_xywh_to_xywha(box)
-            self.assertIsInstance(output, tp)
-            self.assertEqual(output, tp([65, 60, 30, 20, 0]))
-
-            with self.assertRaises(Exception):
-                self._convert_xywh_to_xywha([box])
-
-    def test_box_convert_xywh_to_xywha_array(self):
-        for dtype in [np.float64, np.float32]:
-            box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
-            output = self._convert_xywh_to_xywha(box)
-            self.assertEqual(output.dtype, box.dtype)
-            expected = np.asarray(
-                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
-            )
-            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
-
-    def test_box_convert_xywh_to_xywha_tensor(self):
-        for dtype in [torch.float32, torch.float64]:
-            box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
-            output = self._convert_xywh_to_xywha(box)
-            self.assertEqual(output.dtype, box.dtype)
-            expected = torch.tensor(
-                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
-            )
-
-            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
-
-    def test_json_serializable(self):
-        payload = {"box_mode": BoxMode.XYWH_REL}
-        try:
-            json.dumps(payload)
-        except Exception:
-            self.fail("JSON serialization failed")
-
-    def test_json_deserializable(self):
-        payload = '{"box_mode": 2}'
-        obj = json.loads(payload)
-        try:
-            obj["box_mode"] = BoxMode(obj["box_mode"])
-        except Exception:
-            self.fail("JSON deserialization failed")
-
-
-class TestBoxIOU(unittest.TestCase):
-    def test_pairwise_iou(self):
-        boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
-
-        boxes2 = torch.tensor(
-            [
-                [0.0, 0.0, 1.0, 1.0],
-                [0.0, 0.0, 0.5, 1.0],
-                [0.0, 0.0, 1.0, 0.5],
-                [0.0, 0.0, 0.5, 0.5],
-                [0.5, 0.5, 1.0, 1.0],
-                [0.5, 0.5, 1.5, 1.5],
-            ]
-        )
-
-        expected_ious = torch.tensor(
-            [
-                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
-                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
-            ]
-        )
-
-        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
-
-        self.assertTrue(torch.allclose(ious, expected_ious))
-
-
-class TestBoxes(unittest.TestCase):
-    def test_empty_cat(self):
-        x = Boxes.cat([])
-        self.assertTrue(x.tensor.shape, (0, 4))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py
deleted file mode 100644
index abeb355..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import unittest
-from typing import Sequence
-import torch
-
-from detectron2.structures import ImageList
-
-
-class TestImageList(unittest.TestCase):
-    def test_imagelist_padding_shape(self):
-        class TensorToImageList(torch.nn.Module):
-            def forward(self, tensors: Sequence[torch.Tensor]):
-                return ImageList.from_tensors(tensors, 4).tensor
-
-        func = torch.jit.trace(
-            TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],)
-        )
-        ret = func([torch.ones((3, 15, 20), dtype=torch.float32)])
-        self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape))
-
-        func = torch.jit.trace(
-            TensorToImageList(),
-            (
-                [
-                    torch.ones((3, 16, 10), dtype=torch.float32),
-                    torch.ones((3, 13, 11), dtype=torch.float32),
-                ],
-            ),
-        )
-        ret = func(
-            [
-                torch.ones((3, 25, 20), dtype=torch.float32),
-                torch.ones((3, 10, 10), dtype=torch.float32),
-            ]
-        )
-        # does not support calling with different #images
-        self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py
deleted file mode 100644
index 79c5249..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import unittest
-import torch
-
-from detectron2.structures import Instances
-
-
-class TestInstancesIndexing(unittest.TestCase):
-    def test_int_indexing(self):
-        attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]])
-        attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4])
-        instances = Instances((100, 100))
-        instances.attr1 = attr1
-        instances.attr2 = attr2
-        for i in range(-len(instances), len(instances)):
-            inst = instances[i]
-            self.assertEqual((inst.attr1 == attr1[i]).all(), True)
-            self.assertEqual((inst.attr2 == attr2[i]).all(), True)
-
-        self.assertRaises(IndexError, lambda: instances[len(instances)])
-        self.assertRaises(IndexError, lambda: instances[-len(instances) - 1])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py
deleted file mode 100644
index 575ac48..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from __future__ import absolute_import, division, print_function, unicode_literals
-import logging
-import math
-import random
-import unittest
-import torch
-from fvcore.common.benchmark import benchmark
-
-from detectron2.layers.rotated_boxes import pairwise_iou_rotated
-from detectron2.structures.boxes import Boxes
-from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou
-
-logger = logging.getLogger(__name__)
-
-
-class TestRotatedBoxesLayer(unittest.TestCase):
-    def test_iou_0_dim_cpu(self):
-        boxes1 = torch.rand(0, 5, dtype=torch.float32)
-        boxes2 = torch.rand(10, 5, dtype=torch.float32)
-        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
-        ious = pairwise_iou_rotated(boxes1, boxes2)
-        self.assertTrue(torch.allclose(ious, expected_ious))
-
-        boxes1 = torch.rand(10, 5, dtype=torch.float32)
-        boxes2 = torch.rand(0, 5, dtype=torch.float32)
-        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
-        ious = pairwise_iou_rotated(boxes1, boxes2)
-        self.assertTrue(torch.allclose(ious, expected_ious))
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_iou_0_dim_cuda(self):
-        boxes1 = torch.rand(0, 5, dtype=torch.float32)
-        boxes2 = torch.rand(10, 5, dtype=torch.float32)
-        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
-        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
-        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
-
-        boxes1 = torch.rand(10, 5, dtype=torch.float32)
-        boxes2 = torch.rand(0, 5, dtype=torch.float32)
-        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
-        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
-        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
-
-    def test_iou_half_overlap_cpu(self):
-        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
-        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
-        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
-        ious = pairwise_iou_rotated(boxes1, boxes2)
-        self.assertTrue(torch.allclose(ious, expected_ious))
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_iou_half_overlap_cuda(self):
-        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
-        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
-        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
-        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
-        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
-
-    def test_iou_precision(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device)
-            boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device)
-            iou = 8.3 / 10.0
-            expected_ious = torch.tensor([[iou]], dtype=torch.float32)
-            ious = pairwise_iou_rotated(boxes1, boxes2)
-            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_iou_too_many_boxes_cuda(self):
-        s1, s2 = 5, 1289035
-        boxes1 = torch.zeros(s1, 5)
-        boxes2 = torch.zeros(s2, 5)
-        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
-        self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2))
-
-    def test_iou_extreme(self):
-        # Cause floating point issues in cuda kernels (#1266)
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
-            boxes2 = torch.tensor(
-                [
-                    [
-                        -1.117407639806935e17,
-                        1.3858420478349148e18,
-                        1000.0000610351562,
-                        1000.0000610351562,
-                        1612.0,
-                    ]
-                ],
-                device=device,
-            )
-            ious = pairwise_iou_rotated(boxes1, boxes2)
-            self.assertTrue(ious.min() >= 0, ious)
-
-
-class TestRotatedBoxesStructure(unittest.TestCase):
-    def test_clip_area_0_degree(self):
-        for _ in range(50):
-            num_boxes = 100
-            boxes_5d = torch.zeros(num_boxes, 5)
-            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
-            boxes_4d = torch.zeros(num_boxes, 4)
-            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
-            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
-            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
-            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0
-
-            image_size = (500, 600)
-            test_boxes_4d = Boxes(boxes_4d)
-            test_boxes_5d = RotatedBoxes(boxes_5d)
-            # Before clip
-            areas_4d = test_boxes_4d.area()
-            areas_5d = test_boxes_5d.area()
-            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
-            # After clip
-            test_boxes_4d.clip(image_size)
-            test_boxes_5d.clip(image_size)
-            areas_4d = test_boxes_4d.area()
-            areas_5d = test_boxes_5d.area()
-            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
-
-    def test_clip_area_arbitrary_angle(self):
-        num_boxes = 100
-        boxes_5d = torch.zeros(num_boxes, 5)
-        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
-        clip_angle_threshold = random.uniform(0, 180)
-
-        image_size = (500, 600)
-        test_boxes_5d = RotatedBoxes(boxes_5d)
-        # Before clip
-        areas_before = test_boxes_5d.area()
-        # After clip
-        test_boxes_5d.clip(image_size, clip_angle_threshold)
-        areas_diff = test_boxes_5d.area() - areas_before
-
-        # the areas should only decrease after clipping
-        self.assertTrue(torch.all(areas_diff <= 0))
-        # whenever the box is clipped (thus the area shrinks),
-        # the angle for the box must be within the clip_angle_threshold
-        # Note that the clip function will normalize the angle range
-        # to be within (-180, 180]
-        self.assertTrue(
-            torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold)
-        )
-
-    def test_normalize_angles(self):
-        # torch.manual_seed(0)
-        for _ in range(50):
-            num_boxes = 100
-            boxes_5d = torch.zeros(num_boxes, 5)
-            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
-            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
-            boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
-            rotated_boxes = RotatedBoxes(boxes_5d)
-            normalized_boxes = rotated_boxes.clone()
-            normalized_boxes.normalize_angles()
-            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180))
-            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180))
-            # x, y, w, h should not change
-            self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4]))
-            # the cos/sin values of the angles should stay the same
-
-            self.assertTrue(
-                torch.allclose(
-                    torch.cos(boxes_5d[:, 4] * math.pi / 180),
-                    torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180),
-                    atol=1e-5,
-                )
-            )
-
-            self.assertTrue(
-                torch.allclose(
-                    torch.sin(boxes_5d[:, 4] * math.pi / 180),
-                    torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180),
-                    atol=1e-5,
-                )
-            )
-
-    def test_pairwise_iou_0_degree(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor(
-                [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]],
-                dtype=torch.float32,
-                device=device,
-            )
-            boxes2 = torch.tensor(
-                [
-                    [0.5, 0.5, 1.0, 1.0, 0.0],
-                    [0.25, 0.5, 0.5, 1.0, 0.0],
-                    [0.5, 0.25, 1.0, 0.5, 0.0],
-                    [0.25, 0.25, 0.5, 0.5, 0.0],
-                    [0.75, 0.75, 0.5, 0.5, 0.0],
-                    [1.0, 1.0, 1.0, 1.0, 0.0],
-                ],
-                dtype=torch.float32,
-                device=device,
-            )
-            expected_ious = torch.tensor(
-                [
-                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
-                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
-                ],
-                dtype=torch.float32,
-                device=device,
-            )
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_45_degrees(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor(
-                [
-                    [1, 1, math.sqrt(2), math.sqrt(2), 45],
-                    [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45],
-                ],
-                dtype=torch.float32,
-                device=device,
-            )
-            boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device)
-            expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device)
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_orthogonal(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device)
-            boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device)
-            iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0)
-            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_large_close_boxes(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            boxes1 = torch.tensor(
-                [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]],
-                dtype=torch.float32,
-                device=device,
-            )
-            boxes2 = torch.tensor(
-                [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]],
-                dtype=torch.float32,
-                device=device,
-            )
-            iou = 364.259155 / 364.259186
-            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_many_boxes(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            num_boxes1 = 100
-            num_boxes2 = 200
-            boxes1 = torch.stack(
-                [
-                    torch.tensor(
-                        [5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32, device=device
-                    )
-                    for i in range(num_boxes1)
-                ]
-            )
-            boxes2 = torch.stack(
-                [
-                    torch.tensor(
-                        [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
-                        dtype=torch.float32,
-                        device=device,
-                    )
-                    for i in range(num_boxes2)
-                ]
-            )
-            expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device)
-            for i in range(min(num_boxes1, num_boxes2)):
-                expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_issue1207_simplified(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            # Simplified test case of D2-issue-1207
-            boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device)
-            boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device)
-            iou = 0.0
-            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
-
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_pairwise_iou_issue1207(self):
-        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
-            # The original test case in D2-issue-1207
-            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
-            boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device)
-
-            iou = 0.0
-            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
-
-            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
-            self.assertTrue(torch.allclose(ious, expected_ious))
-
-    def test_empty_cat(self):
-        x = RotatedBoxes.cat([])
-        self.assertTrue(x.tensor.shape, (0, 5))
-
-
-def benchmark_rotated_iou():
-    num_boxes1 = 200
-    num_boxes2 = 500
-    boxes1 = torch.stack(
-        [
-            torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32)
-            for i in range(num_boxes1)
-        ]
-    )
-    boxes2 = torch.stack(
-        [
-            torch.tensor(
-                [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32
-            )
-            for i in range(num_boxes2)
-        ]
-    )
-
-    def func(dev, n=1):
-        b1 = boxes1.to(device=dev)
-        b2 = boxes2.to(device=dev)
-
-        def bench():
-            for _ in range(n):
-                pairwise_iou_rotated(b1, b2)
-            if dev.type == "cuda":
-                torch.cuda.synchronize()
-
-        return bench
-
-    # only run it once per timed loop, since it's slow
-    args = [{"dev": torch.device("cpu"), "n": 1}]
-    if torch.cuda.is_available():
-        args.append({"dev": torch.device("cuda"), "n": 10})
-
-    benchmark(func, "rotated_iou", args, warmup_iters=3)
-
-
-if __name__ == "__main__":
-    unittest.main()
-    benchmark_rotated_iou()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py
deleted file mode 100644
index 725b488..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import unittest
-from collections import OrderedDict
-import torch
-from torch import nn
-
-from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts
-from detectron2.utils.logger import setup_logger
-
-
-class TestCheckpointer(unittest.TestCase):
-    def setUp(self):
-        setup_logger()
-
-    def create_complex_model(self):
-        m = nn.Module()
-        m.block1 = nn.Module()
-        m.block1.layer1 = nn.Linear(2, 3)
-        m.layer2 = nn.Linear(3, 2)
-        m.res = nn.Module()
-        m.res.layer2 = nn.Linear(3, 2)
-
-        state_dict = OrderedDict()
-        state_dict["layer1.weight"] = torch.rand(3, 2)
-        state_dict["layer1.bias"] = torch.rand(3)
-        state_dict["layer2.weight"] = torch.rand(2, 3)
-        state_dict["layer2.bias"] = torch.rand(2)
-        state_dict["res.layer2.weight"] = torch.rand(2, 3)
-        state_dict["res.layer2.bias"] = torch.rand(2)
-        return m, state_dict
-
-    def test_complex_model_loaded(self):
-        for add_data_parallel in [False, True]:
-            model, state_dict = self.create_complex_model()
-            if add_data_parallel:
-                model = nn.DataParallel(model)
-            model_sd = model.state_dict()
-
-            align_and_update_state_dicts(model_sd, state_dict)
-            for loaded, stored in zip(model_sd.values(), state_dict.values()):
-                # different tensor references
-                self.assertFalse(id(loaded) == id(stored))
-                # same content
-                self.assertTrue(loaded.equal(stored))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py
deleted file mode 100644
index 650bdf2..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-
-import os
-import tempfile
-import unittest
-import torch
-
-from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config
-from detectron2.layers import ShapeSpec
-
-_V0_CFG = """
-MODEL:
-  RPN_HEAD:
-    NAME: "TEST"
-VERSION: 0
-"""
-
-_V1_CFG = """
-MODEL:
-  WEIGHT: "/path/to/weight"
-"""
-
-
-class TestConfigVersioning(unittest.TestCase):
-    def test_upgrade_downgrade_consistency(self):
-        cfg = get_cfg()
-        # check that custom is preserved
-        cfg.USER_CUSTOM = 1
-
-        down = downgrade_config(cfg, to_version=0)
-        up = upgrade_config(down)
-        self.assertTrue(up == cfg)
-
-    def _merge_cfg_str(self, cfg, merge_str):
-        f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
-        try:
-            f.write(merge_str)
-            f.close()
-            cfg.merge_from_file(f.name)
-        finally:
-            os.remove(f.name)
-        return cfg
-
-    def test_auto_upgrade(self):
-        cfg = get_cfg()
-        latest_ver = cfg.VERSION
-        cfg.USER_CUSTOM = 1
-
-        self._merge_cfg_str(cfg, _V0_CFG)
-
-        self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST")
-        self.assertEqual(cfg.VERSION, latest_ver)
-
-    def test_guess_v1(self):
-        cfg = get_cfg()
-        latest_ver = cfg.VERSION
-        self._merge_cfg_str(cfg, _V1_CFG)
-        self.assertEqual(cfg.VERSION, latest_ver)
-
-
-class _TestClassA(torch.nn.Module):
-    @configurable
-    def __init__(self, arg1, arg2, arg3=3):
-        super().__init__()
-        self.arg1 = arg1
-        self.arg2 = arg2
-        self.arg3 = arg3
-        assert arg1 == 1
-        assert arg2 == 2
-        assert arg3 == 3
-
-    @classmethod
-    def from_config(cls, cfg):
-        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
-        return args
-
-
-class _TestClassB(_TestClassA):
-    @configurable
-    def __init__(self, input_shape, arg1, arg2, arg3=3):
-        """
-        Doc of _TestClassB
-        """
-        assert input_shape == "shape"
-        super().__init__(arg1, arg2, arg3)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):  # test extra positional arg in from_config
-        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
-        args["input_shape"] = input_shape
-        return args
-
-
-class _LegacySubClass(_TestClassB):
-    # an old subclass written in cfg style
-    def __init__(self, cfg, input_shape, arg4=4):
-        super().__init__(cfg, input_shape)
-        assert self.arg1 == 1
-        assert self.arg2 == 2
-        assert self.arg3 == 3
-
-
-class _NewSubClassNewInit(_TestClassB):
-    # test new subclass with a new __init__
-    @configurable
-    def __init__(self, input_shape, arg4=4, **kwargs):
-        super().__init__(input_shape, **kwargs)
-        assert self.arg1 == 1
-        assert self.arg2 == 2
-        assert self.arg3 == 3
-
-
-class _LegacySubClassNotCfg(_TestClassB):
-    # an old subclass written in cfg style, but argument is not called "cfg"
-    def __init__(self, config, input_shape):
-        super().__init__(config, input_shape)
-        assert self.arg1 == 1
-        assert self.arg2 == 2
-        assert self.arg3 == 3
-
-
-class _TestClassC(_TestClassB):
-    @classmethod
-    def from_config(cls, cfg, input_shape, **kwargs):  # test extra kwarg overwrite
-        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
-        args["input_shape"] = input_shape
-        args.update(kwargs)
-        return args
-
-
-class _TestClassD(_TestClassA):
-    @configurable
-    def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3):
-        assert input_shape == "shape"
-        super().__init__(arg1, arg2, arg3)
-
-    # _TestClassA.from_config does not have input_shape args.
-    # Test whether input_shape will be forwarded to __init__
-
-
-class TestConfigurable(unittest.TestCase):
-    def testInitWithArgs(self):
-        _ = _TestClassA(arg1=1, arg2=2, arg3=3)
-        _ = _TestClassB("shape", arg1=1, arg2=2)
-        _ = _TestClassC("shape", arg1=1, arg2=2)
-        _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3)
-
-    def testPatchedAttr(self):
-        self.assertTrue("Doc" in _TestClassB.__init__.__doc__)
-        self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int)
-
-    def testInitWithCfg(self):
-        cfg = get_cfg()
-        cfg.ARG1 = 1
-        cfg.ARG2 = 2
-        cfg.ARG3 = 3
-        _ = _TestClassA(cfg)
-        _ = _TestClassB(cfg, input_shape="shape")
-        _ = _TestClassC(cfg, input_shape="shape")
-        _ = _TestClassD(cfg, input_shape="shape")
-        _ = _LegacySubClass(cfg, input_shape="shape")
-        _ = _NewSubClassNewInit(cfg, input_shape="shape")
-        _ = _LegacySubClassNotCfg(cfg, input_shape="shape")
-        with self.assertRaises(TypeError):
-            # disallow forwarding positional args to __init__ since it's prone to errors
-            _ = _TestClassD(cfg, "shape")
-
-        # call with kwargs instead
-        _ = _TestClassA(cfg=cfg)
-        _ = _TestClassB(cfg=cfg, input_shape="shape")
-        _ = _TestClassC(cfg=cfg, input_shape="shape")
-        _ = _TestClassD(cfg=cfg, input_shape="shape")
-        _ = _LegacySubClass(cfg=cfg, input_shape="shape")
-        _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape")
-        _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape")
-
-    def testInitWithCfgOverwrite(self):
-        cfg = get_cfg()
-        cfg.ARG1 = 1
-        cfg.ARG2 = 999  # wrong config
-        with self.assertRaises(AssertionError):
-            _ = _TestClassA(cfg, arg3=3)
-
-        # overwrite arg2 with correct config later:
-        _ = _TestClassA(cfg, arg2=2, arg3=3)
-        _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3)
-        _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3)
-        _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3)
-
-        # call with kwargs cfg=cfg instead
-        _ = _TestClassA(cfg=cfg, arg2=2, arg3=3)
-        _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
-        _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
-        _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
-
-    def testInitWithCfgWrongArgs(self):
-        cfg = get_cfg()
-        cfg.ARG1 = 1
-        cfg.ARG2 = 2
-        with self.assertRaises(TypeError):
-            _ = _TestClassB(cfg, "shape", not_exist=1)
-        with self.assertRaises(TypeError):
-            _ = _TestClassC(cfg, "shape", not_exist=1)
-        with self.assertRaises(TypeError):
-            _ = _TestClassD(cfg, "shape", not_exist=1)
-
-    def testBadClass(self):
-        class _BadClass1:
-            @configurable
-            def __init__(self, a=1, b=2):
-                pass
-
-        class _BadClass2:
-            @configurable
-            def __init__(self, a=1, b=2):
-                pass
-
-            def from_config(self, cfg):  # noqa
-                pass
-
-        class _BadClass3:
-            @configurable
-            def __init__(self, a=1, b=2):
-                pass
-
-            # bad name: must be cfg
-            @classmethod
-            def from_config(cls, config):  # noqa
-                pass
-
-        with self.assertRaises(AttributeError):
-            _ = _BadClass1(a=1)
-
-        with self.assertRaises(TypeError):
-            _ = _BadClass2(a=1)
-
-        with self.assertRaises(TypeError):
-            _ = _BadClass3(get_cfg())
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py
deleted file mode 100644
index ad989c4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# -*- coding: utf-8 -*-
-
-import copy
-import numpy as np
-import os
-import tempfile
-import unittest
-import cv2
-import torch
-from fvcore.common.file_io import PathManager
-
-from detectron2 import model_zoo
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import DatasetCatalog
-from detectron2.modeling import build_model
-from detectron2.utils.logger import setup_logger
-
-
-@unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.")
-class TestCaffe2Export(unittest.TestCase):
-    def setUp(self):
-        setup_logger()
-
-    def _test_model(self, config_path, device="cpu"):
-        # requires extra dependencies
-        from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model
-
-        cfg = get_cfg()
-        cfg.merge_from_file(model_zoo.get_config_file(config_path))
-        cfg = add_export_config(cfg)
-        cfg.MODEL.DEVICE = device
-
-        model = build_model(cfg)
-        DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path))
-
-        inputs = [{"image": self._get_test_image()}]
-        c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs))
-
-        with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d:
-            c2_model.save_protobuf(d)
-            c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs))
-            c2_model = Caffe2Model.load_protobuf(d)
-        c2_model(inputs)[0]["instances"]
-
-    def _get_test_image(self):
-        try:
-            file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"]
-            assert PathManager.exists(file_name)
-        except Exception:
-            self.skipTest("COCO dataset not available.")
-
-        with PathManager.open(file_name, "rb") as f:
-            buf = f.read()
-        img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR)
-        assert img is not None, file_name
-        return torch.from_numpy(img.transpose(2, 0, 1))
-
-    def testMaskRCNN(self):
-        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
-
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def testMaskRCNNGPU(self):
-        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda")
-
-    def testRetinaNet(self):
-        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
-
-    def testPanopticFPN(self):
-        self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py
deleted file mode 100644
index 0e3f84c..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-
-import unittest
-import torch
-
-import detectron2.model_zoo as model_zoo
-from detectron2.config import get_cfg
-from detectron2.modeling import build_model
-from detectron2.utils.analysis import flop_count_operators, parameter_count
-
-
-def get_model_zoo(config_path):
-    """
-    Like model_zoo.get, but do not load any weights (even pretrained)
-    """
-    cfg_file = model_zoo.get_config_file(config_path)
-    cfg = get_cfg()
-    cfg.merge_from_file(cfg_file)
-    if not torch.cuda.is_available():
-        cfg.MODEL.DEVICE = "cpu"
-    return build_model(cfg)
-
-
-class RetinaNetTest(unittest.TestCase):
-    def setUp(self):
-        self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml")
-
-    def test_flop(self):
-        # RetinaNet supports flop-counting with random inputs
-        inputs = [{"image": torch.rand(3, 800, 800)}]
-        res = flop_count_operators(self.model, inputs)
-        self.assertTrue(int(res["conv"]), 146)  # 146B flops
-
-    def test_param_count(self):
-        res = parameter_count(self.model)
-        self.assertTrue(res[""], 37915572)
-        self.assertTrue(res["backbone"], 31452352)
-
-
-class FasterRCNNTest(unittest.TestCase):
-    def setUp(self):
-        self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml")
-
-    def test_flop(self):
-        # Faster R-CNN supports flop-counting with random inputs
-        inputs = [{"image": torch.rand(3, 800, 800)}]
-        res = flop_count_operators(self.model, inputs)
-
-        # This only checks flops for backbone & proposal generator
-        # Flops for box head is not conv, and depends on #proposals, which is
-        # almost 0 for random inputs.
-        self.assertTrue(int(res["conv"]), 117)
-
-    def test_param_count(self):
-        res = parameter_count(self.model)
-        self.assertTrue(res[""], 41699936)
-        self.assertTrue(res["backbone"], 26799296)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py
deleted file mode 100644
index 2d16c71..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import logging
-import unittest
-
-from detectron2 import model_zoo
-from detectron2.modeling import FPN, GeneralizedRCNN
-
-logger = logging.getLogger(__name__)
-
-
-class TestModelZoo(unittest.TestCase):
-    def test_get_returns_model(self):
-        model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False)
-        self.assertIsInstance(model, GeneralizedRCNN)
-        self.assertIsInstance(model.backbone, FPN)
-
-    def test_get_invalid_model(self):
-        self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml")
-
-    def test_get_url(self):
-        url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml")
-        self.assertEqual(
-            url,
-            "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl",  # noqa
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py
deleted file mode 100644
index 1cdeddc..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-# File:
-
-import numpy as np
-import unittest
-import torch
-
-from detectron2.data import MetadataCatalog
-from detectron2.structures import BoxMode, Instances, RotatedBoxes
-from detectron2.utils.visualizer import Visualizer
-
-
-class TestVisualizer(unittest.TestCase):
-    def _random_data(self):
-        H, W = 100, 100
-        N = 10
-        img = np.random.rand(H, W, 3) * 255
-        boxxy = np.random.rand(N, 2) * (H // 2)
-        boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1)
-
-        def _rand_poly():
-            return np.random.rand(3, 2).flatten() * H
-
-        polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)]
-
-        mask = np.zeros_like(img[:, :, 0], dtype=np.bool)
-        mask[:10, 10:20] = 1
-
-        labels = [str(i) for i in range(N)]
-        return img, boxes, labels, polygons, [mask] * N
-
-    @property
-    def metadata(self):
-        return MetadataCatalog.get("coco_2017_train")
-
-    def test_draw_dataset_dict(self):
-        img = np.random.rand(512, 512, 3) * 255
-        dic = {
-            "annotations": [
-                {
-                    "bbox": [
-                        368.9946492271106,
-                        330.891438763377,
-                        13.148537455410235,
-                        13.644708680142685,
-                    ],
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "category_id": 0,
-                    "iscrowd": 1,
-                    "segmentation": {
-                        "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2",
-                        "size": [512, 512],
-                    },
-                }
-            ],
-            "height": 512,
-            "image_id": 1,
-            "width": 512,
-        }
-        v = Visualizer(img, self.metadata)
-        v.draw_dataset_dict(dic)
-
-    def test_overlay_instances(self):
-        img, boxes, labels, polygons, masks = self._random_data()
-
-        v = Visualizer(img, self.metadata)
-        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
-        self.assertEqual(output.shape, img.shape)
-
-        # Test 2x scaling
-        v = Visualizer(img, self.metadata, scale=2.0)
-        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
-        self.assertEqual(output.shape[0], img.shape[0] * 2)
-
-        # Test overlay masks
-        v = Visualizer(img, self.metadata)
-        output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image()
-        self.assertEqual(output.shape, img.shape)
-
-    def test_overlay_instances_no_boxes(self):
-        img, boxes, labels, polygons, _ = self._random_data()
-        v = Visualizer(img, self.metadata)
-        v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
-
-    def test_draw_instance_predictions(self):
-        img, boxes, _, _, masks = self._random_data()
-        num_inst = len(boxes)
-        inst = Instances((img.shape[0], img.shape[1]))
-        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
-        inst.scores = torch.rand(num_inst)
-        inst.pred_boxes = torch.from_numpy(boxes)
-        inst.pred_masks = torch.from_numpy(np.asarray(masks))
-
-        v = Visualizer(img, self.metadata)
-        v.draw_instance_predictions(inst)
-
-    def test_draw_empty_mask_predictions(self):
-        img, boxes, _, _, masks = self._random_data()
-        num_inst = len(boxes)
-        inst = Instances((img.shape[0], img.shape[1]))
-        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
-        inst.scores = torch.rand(num_inst)
-        inst.pred_boxes = torch.from_numpy(boxes)
-        inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks)))
-
-        v = Visualizer(img, self.metadata)
-        v.draw_instance_predictions(inst)
-
-    def test_correct_output_shape(self):
-        img = np.random.rand(928, 928, 3) * 255
-        v = Visualizer(img, self.metadata)
-        out = v.output.get_image()
-        self.assertEqual(out.shape, img.shape)
-
-    def test_overlay_rotated_instances(self):
-        H, W = 100, 150
-        img = np.random.rand(H, W, 3) * 255
-        num_boxes = 50
-        boxes_5d = torch.zeros(num_boxes, 5)
-        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W)
-        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H)
-        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
-        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
-        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
-        rotated_boxes = RotatedBoxes(boxes_5d)
-        labels = [str(i) for i in range(num_boxes)]
-
-        v = Visualizer(img, self.metadata)
-        output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image()
-        self.assertEqual(output.shape, img.shape)
-
-    def test_draw_no_metadata(self):
-        img, boxes, _, _, masks = self._random_data()
-        num_inst = len(boxes)
-        inst = Instances((img.shape[0], img.shape[1]))
-        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
-        inst.scores = torch.rand(num_inst)
-        inst.pred_boxes = torch.from_numpy(boxes)
-        inst.pred_masks = torch.from_numpy(np.asarray(masks))
-
-        v = Visualizer(img, MetadataCatalog.get("asdfasdf"))
-        v.draw_instance_predictions(inst)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md
deleted file mode 100644
index 3733863..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-
-This directory contains a few scripts that use detectron2.
-
-
-* `train_net.py`
-
-An example training script that's made to train builtin models of detectron2.
-
-For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md).
-
-* `plain_train_net.py`
-
-Similar to `train_net.py`, but implements a training loop instead of using `Trainer`.
-This script includes fewer features but it may be more friendly to hackers.
-
-* `benchmark.py`
-
-Benchmark the training speed, inference speed or data loading speed of a given config.
-
-Usage:
-```
-python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags]
-```
-
-* `visualize_json_results.py`
-
-Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator`
-
-Usage:
-```
-python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val
-```
-If not using a builtin dataset, you'll need your own script or modify this script.
-
-* `visualize_data.py`
-
-Visualize ground truth raw annotations or training data (after preprocessing/augmentations).
-
-Usage:
-```
-python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show]
-```
-
-NOTE: the script does not stop by itself when using `--source dataloader` because a training
-dataloader is usually infinite.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py
deleted file mode 100644
index 9c06ea4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import logging
-import numpy as np
-from collections import Counter
-import tqdm
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import build_detection_test_loader
-from detectron2.engine import default_argument_parser
-from detectron2.modeling import build_model
-from detectron2.utils.analysis import (
-    activation_count_operators,
-    flop_count_operators,
-    parameter_count_table,
-)
-from detectron2.utils.logger import setup_logger
-
-logger = logging.getLogger("detectron2")
-
-
-def setup(args):
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.DATALOADER.NUM_WORKERS = 0
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    setup_logger()
-    return cfg
-
-
-def do_flop(cfg):
-    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
-    model = build_model(cfg)
-    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
-    model.eval()
-
-    counts = Counter()
-    total_flops = []
-    for idx, data in zip(tqdm.trange(args.num_inputs), data_loader):  # noqa
-        count = flop_count_operators(model, data)
-        counts += count
-        total_flops.append(sum(count.values()))
-    logger.info(
-        "(G)Flops for Each Type of Operators:\n" + str([(k, v / idx) for k, v in counts.items()])
-    )
-    logger.info("Total (G)Flops: {}±{}".format(np.mean(total_flops), np.std(total_flops)))
-
-
-def do_activation(cfg):
-    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
-    model = build_model(cfg)
-    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
-    model.eval()
-
-    counts = Counter()
-    total_activations = []
-    for idx, data in zip(tqdm.trange(args.num_inputs), data_loader):  # noqa
-        count = activation_count_operators(model, data)
-        counts += count
-        total_activations.append(sum(count.values()))
-    logger.info(
-        "(Million) Activations for Each Type of Operators:\n"
-        + str([(k, v / idx) for k, v in counts.items()])
-    )
-    logger.info(
-        "Total (Million) Activations: {}±{}".format(
-            np.mean(total_activations), np.std(total_activations)
-        )
-    )
-
-
-def do_parameter(cfg):
-    model = build_model(cfg)
-    logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5))
-
-
-def do_structure(cfg):
-    model = build_model(cfg)
-    logger.info("Model Structure:\n" + str(model))
-
-
-if __name__ == "__main__":
-    parser = default_argument_parser(
-        epilog="""
-Examples:
-
-To show parameters of a model:
-$ ./analyze_model.py --tasks parameter \\
-    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
-
-Flops and activations are data-dependent, therefore inputs and model weights
-are needed to count them:
-
-$ ./analyze_model.py --num-inputs 100 --tasks flop \\
-    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\
-    MODEL.WEIGHTS /path/to/model.pkl
-"""
-    )
-    parser.add_argument(
-        "--tasks",
-        choices=["flop", "activation", "parameter", "structure"],
-        required=True,
-        nargs="+",
-    )
-    parser.add_argument(
-        "--num-inputs",
-        default=100,
-        type=int,
-        help="number of inputs used to compute statistics for flops/activations, "
-        "both are data dependent.",
-    )
-    args = parser.parse_args()
-    assert not args.eval_only
-    assert args.num_gpus == 1
-
-    cfg = setup(args)
-
-    for task in args.tasks:
-        {
-            "flop": do_flop,
-            "activation": do_activation,
-            "parameter": do_parameter,
-            "structure": do_structure,
-        }[task](cfg)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py
deleted file mode 100644
index 9eec59f..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-A script to benchmark builtin models.
-
-Note: this script has an extra dependency of psutil.
-"""
-
-import itertools
-import logging
-import psutil
-import torch
-import tqdm
-from fvcore.common.timer import Timer
-from torch.nn.parallel import DistributedDataParallel
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import (
-    DatasetFromList,
-    build_detection_test_loader,
-    build_detection_train_loader,
-)
-from detectron2.engine import SimpleTrainer, default_argument_parser, hooks, launch
-from detectron2.modeling import build_model
-from detectron2.solver import build_optimizer
-from detectron2.utils import comm
-from detectron2.utils.events import CommonMetricPrinter
-from detectron2.utils.logger import setup_logger
-
-logger = logging.getLogger("detectron2")
-
-
-def setup(args):
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.SOLVER.BASE_LR = 0.001  # Avoid NaNs. Not useful in this script anyway.
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    setup_logger(distributed_rank=comm.get_rank())
-    return cfg
-
-
-def benchmark_data(args):
-    cfg = setup(args)
-
-    timer = Timer()
-    dataloader = build_detection_train_loader(cfg)
-    logger.info("Initialize loader using {} seconds.".format(timer.seconds()))
-
-    timer.reset()
-    itr = iter(dataloader)
-    for i in range(10):  # warmup
-        next(itr)
-        if i == 0:
-            startup_time = timer.seconds()
-    timer = Timer()
-    max_iter = 1000
-    for _ in tqdm.trange(max_iter):
-        next(itr)
-    logger.info(
-        "{} iters ({} images) in {} seconds.".format(
-            max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
-        )
-    )
-    logger.info("Startup time: {} seconds".format(startup_time))
-    vram = psutil.virtual_memory()
-    logger.info(
-        "RAM Usage: {:.2f}/{:.2f} GB".format(
-            (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3
-        )
-    )
-
-    # test for a few more rounds
-    for _ in range(10):
-        timer = Timer()
-        max_iter = 1000
-        for _ in tqdm.trange(max_iter):
-            next(itr)
-        logger.info(
-            "{} iters ({} images) in {} seconds.".format(
-                max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
-            )
-        )
-
-
-def benchmark_train(args):
-    cfg = setup(args)
-    model = build_model(cfg)
-    logger.info("Model:\n{}".format(model))
-    if comm.get_world_size() > 1:
-        model = DistributedDataParallel(
-            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
-        )
-    optimizer = build_optimizer(cfg, model)
-    checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
-    checkpointer.load(cfg.MODEL.WEIGHTS)
-
-    cfg.defrost()
-    cfg.DATALOADER.NUM_WORKERS = 0
-    data_loader = build_detection_train_loader(cfg)
-    dummy_data = list(itertools.islice(data_loader, 100))
-
-    def f():
-        data = DatasetFromList(dummy_data, copy=False)
-        while True:
-            yield from data
-
-    max_iter = 400
-    trainer = SimpleTrainer(model, f(), optimizer)
-    trainer.register_hooks(
-        [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])]
-    )
-    trainer.train(1, max_iter)
-
-
-@torch.no_grad()
-def benchmark_eval(args):
-    cfg = setup(args)
-    model = build_model(cfg)
-    model.eval()
-    logger.info("Model:\n{}".format(model))
-    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
-
-    cfg.defrost()
-    cfg.DATALOADER.NUM_WORKERS = 0
-    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
-    dummy_data = list(itertools.islice(data_loader, 100))
-
-    def f():
-        while True:
-            yield from DatasetFromList(dummy_data, copy=False)
-
-    for _ in range(5):  # warmup
-        model(dummy_data[0])
-
-    max_iter = 400
-    timer = Timer()
-    with tqdm.tqdm(total=max_iter) as pbar:
-        for idx, d in enumerate(f()):
-            if idx == max_iter:
-                break
-            model(d)
-            pbar.update()
-    logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
-
-
-if __name__ == "__main__":
-    parser = default_argument_parser()
-    parser.add_argument("--task", choices=["train", "eval", "data"], required=True)
-    args = parser.parse_args()
-    assert not args.eval_only
-
-    if args.task == "data":
-        f = benchmark_data
-    elif args.task == "train":
-        """
-        Note: training speed may not be representative.
-        The training cost of a R-CNN model varies with the content of the data
-        and the quality of the model.
-        """
-        f = benchmark_train
-    elif args.task == "eval":
-        f = benchmark_eval
-        # only benchmark single-GPU inference.
-        assert args.num_gpus == 1 and args.num_machines == 1
-    launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py
deleted file mode 100644
index 18a24e4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import pickle as pkl
-import sys
-import torch
-
-"""
-Usage:
-  # download one of the ResNet{18,34,50,101,152} models from torchvision:
-  wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth
-  # run the conversion
-  ./convert-torchvision-to-d2.py r50.pth r50.pkl
-
-  # Then, use r50.pkl with the following changes in config:
-
-MODEL:
-  WEIGHTS: "/path/to/r50.pkl"
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  RESNETS:
-    DEPTH: 50
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
-
-  These models typically produce slightly worse results than the
-  pre-trained ResNets we use in official configs, which are the
-  original ResNet models released by MSRA.
-"""
-
-if __name__ == "__main__":
-    input = sys.argv[1]
-
-    obj = torch.load(input, map_location="cpu")
-
-    newmodel = {}
-    for k in list(obj.keys()):
-        old_k = k
-        if "layer" not in k:
-            k = "stem." + k
-        for t in [1, 2, 3, 4]:
-            k = k.replace("layer{}".format(t), "res{}".format(t + 1))
-        for t in [1, 2, 3]:
-            k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
-        k = k.replace("downsample.0", "shortcut")
-        k = k.replace("downsample.1", "shortcut.norm")
-        print(old_k, "->", k)
-        newmodel[k] = obj.pop(old_k).detach().numpy()
-
-    res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True}
-
-    with open(sys.argv[2], "wb") as f:
-        pkl.dump(res, f)
-    if obj:
-        print("Unconverted keys:", obj.keys())
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md
deleted file mode 100644
index b9d5b15..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-
-This directory contains:
-
-1. A script that converts a detectron2 model to caffe2 format.
-
-2. An example that loads a Mask R-CNN model in caffe2 format and runs inference.
-
-See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html)
-for their usage.
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py
deleted file mode 100644
index 08feb69..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-import argparse
-import os
-import onnx
-import torch
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import build_detection_test_loader
-from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
-from detectron2.export import Caffe2Tracer, add_export_config
-from detectron2.modeling import build_model
-from detectron2.utils.logger import setup_logger
-
-
-def setup_cfg(args):
-    cfg = get_cfg()
-    # cuda context is initialized before creating dataloader, so we don't fork anymore
-    cfg.DATALOADER.NUM_WORKERS = 0
-    cfg = add_export_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    if cfg.MODEL.DEVICE != "cpu":
-        TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
-        assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
-    return cfg
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
-    parser.add_argument(
-        "--format",
-        choices=["caffe2", "onnx", "torchscript"],
-        help="output format",
-        default="caffe2",
-    )
-    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
-    parser.add_argument("--run-eval", action="store_true")
-    parser.add_argument("--output", help="output directory for the converted model")
-    parser.add_argument(
-        "opts",
-        help="Modify config options using the command-line",
-        default=None,
-        nargs=argparse.REMAINDER,
-    )
-    args = parser.parse_args()
-    logger = setup_logger()
-    logger.info("Command line arguments: " + str(args))
-    os.makedirs(args.output, exist_ok=True)
-
-    cfg = setup_cfg(args)
-
-    # create a torch model
-    torch_model = build_model(cfg)
-    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
-
-    # get a sample data
-    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
-    first_batch = next(iter(data_loader))
-
-    # convert and save caffe2 model
-    tracer = Caffe2Tracer(cfg, torch_model, first_batch)
-    if args.format == "caffe2":
-        caffe2_model = tracer.export_caffe2()
-        caffe2_model.save_protobuf(args.output)
-        # draw the caffe2 graph
-        caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
-    elif args.format == "onnx":
-        onnx_model = tracer.export_onnx()
-        onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
-    elif args.format == "torchscript":
-        script_model = tracer.export_torchscript()
-        script_model.save(os.path.join(args.output, "model.ts"))
-
-        # Recursively print IR of all modules
-        with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
-            try:
-                f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
-            except AttributeError:
-                pass
-        # Print IR of the entire graph (all submodules inlined)
-        with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
-            f.write(str(script_model.inlined_graph))
-        # Print the model structure in pytorch style
-        with open(os.path.join(args.output, "model.txt"), "w") as f:
-            f.write(str(script_model))
-
-    # run evaluation with the converted model
-    if args.run_eval:
-        assert args.format == "caffe2", "Python inference in other format is not yet supported."
-        dataset = cfg.DATASETS.TEST[0]
-        data_loader = build_detection_test_loader(cfg, dataset)
-        # NOTE: hard-coded evaluator. change to the evaluator for your dataset
-        evaluator = COCOEvaluator(dataset, cfg, True, args.output)
-        metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
-        print_csv_format(metrics)
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp
deleted file mode 100644
index 44370b4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-#include <c10/util/Flags.h>
-#include <caffe2/core/blob.h>
-#include <caffe2/core/common.h>
-#include <caffe2/core/init.h>
-#include <caffe2/core/net.h>
-#include <caffe2/core/workspace.h>
-#include <caffe2/utils/proto_utils.h>
-
-#include <opencv2/opencv.hpp>
-#include <cassert>
-#include <chrono>
-#include <iostream>
-#include <string>
-
-C10_DEFINE_string(predict_net, "", "path to model.pb");
-C10_DEFINE_string(init_net, "", "path to model_init.pb");
-C10_DEFINE_string(input, "", "path to input image");
-
-using namespace std;
-using namespace caffe2;
-
-int main(int argc, char** argv) {
-  caffe2::GlobalInit(&argc, &argv);
-  string predictNetPath = FLAGS_predict_net;
-  string initNetPath = FLAGS_init_net;
-  cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR);
-
-  const int height = input.rows;
-  const int width = input.cols;
-  // FPN models require divisibility of 32
-  assert(height % 32 == 0 && width % 32 == 0);
-  const int batch = 1;
-  const int channels = 3;
-
-  // initialize Net and Workspace
-  caffe2::NetDef initNet_, predictNet_;
-  CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_));
-  CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_));
-
-  Workspace workSpace;
-  for (auto& str : predictNet_.external_input()) {
-    workSpace.CreateBlob(str);
-  }
-  CAFFE_ENFORCE(workSpace.CreateNet(predictNet_));
-  CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_));
-
-  // setup inputs
-  auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU);
-  data->Resize(batch, channels, height, width);
-  float* ptr = data->mutable_data<float>();
-  // HWC to CHW
-  for (int c = 0; c < 3; ++c) {
-    for (int i = 0; i < height * width; ++i) {
-      ptr[c * height * width + i] = static_cast<float>(input.data[3 * i + c]);
-    }
-  }
-
-  auto im_info =
-      BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU);
-  im_info->Resize(batch, 3);
-  float* im_info_ptr = im_info->mutable_data<float>();
-  im_info_ptr[0] = height;
-  im_info_ptr[1] = width;
-  im_info_ptr[2] = 1.0;
-
-  // run the network
-  CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
-
-  // run 3 more times to benchmark
-  int N_benchmark = 3;
-  auto start_time = chrono::high_resolution_clock::now();
-  for (int i = 0; i < N_benchmark; ++i) {
-    CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
-  }
-  auto end_time = chrono::high_resolution_clock::now();
-  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
-                .count();
-  cout << "Latency (should vary with different inputs): "
-       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
-
-  // parse Mask R-CNN outputs
-  caffe2::Tensor bbox(
-      workSpace.GetBlob("bbox_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
-  caffe2::Tensor scores(
-      workSpace.GetBlob("score_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
-  caffe2::Tensor labels(
-      workSpace.GetBlob("class_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
-  caffe2::Tensor mask_probs(
-      workSpace.GetBlob("mask_fcn_probs")->Get<caffe2::Tensor>(), caffe2::CPU);
-  cout << "bbox:" << bbox.DebugString() << endl;
-  cout << "scores:" << scores.DebugString() << endl;
-  cout << "labels:" << labels.DebugString() << endl;
-  cout << "mask_probs: " << mask_probs.DebugString() << endl;
-
-  int num_instances = bbox.sizes()[0];
-  for (int i = 0; i < num_instances; ++i) {
-    float score = scores.data<float>()[i];
-    if (score < 0.6)
-      continue; // skip them
-
-    const float* box = bbox.data<float>() + i * 4;
-    int label = labels.data<float>()[i];
-
-    cout << "Prediction " << i << ", xyxy=(";
-    cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3]
-         << "); score=" << score << "; label=" << label << endl;
-
-    const float* mask = mask_probs.data<float>() +
-        i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2);
-
-    // save the 28x28 mask
-    cv::Mat cv_mask(28, 28, CV_32FC1);
-    memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float));
-    cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.);
-  }
-  return 0;
-}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp
deleted file mode 100644
index 82fbdb0..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-
-#include <opencv2/opencv.hpp>
-#include <iostream>
-#include <string>
-
-#include <torch/csrc/autograd/grad_mode.h>
-#include <torch/script.h>
-
-using namespace std;
-
-// experimental. don't use
-int main(int argc, const char* argv[]) {
-  if (argc != 3) {
-    return 1;
-  }
-  std::string image_file = argv[2];
-
-  torch::autograd::AutoGradMode guard(false);
-  auto module = torch::jit::load(argv[1]);
-
-  assert(module.buffers().size() > 0);
-  // Assume that the entire model is on the same device.
-  // We just put input to this device.
-  auto device = (*begin(module.buffers())).device();
-
-  cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR);
-  const int height = input_img.rows;
-  const int width = input_img.cols;
-  // FPN models require divisibility of 32
-  assert(height % 32 == 0 && width % 32 == 0);
-  const int channels = 3;
-
-  auto input = torch::from_blob(
-      input_img.data, {1, height, width, channels}, torch::kUInt8);
-  // NHWC to NCHW
-  input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous();
-
-  std::array<float, 3> im_info_data{height * 1.0f, width * 1.0f, 1.0f};
-  auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device);
-
-  // run the network
-  auto output = module.forward({std::make_tuple(input, im_info)});
-
-  // run 3 more times to benchmark
-  int N_benchmark = 3;
-  auto start_time = chrono::high_resolution_clock::now();
-  for (int i = 0; i < N_benchmark; ++i) {
-    output = module.forward({std::make_tuple(input, im_info)});
-  }
-  auto end_time = chrono::high_resolution_clock::now();
-  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
-                .count();
-  cout << "Latency (should vary with different inputs): "
-       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
-
-  auto outputs = output.toTuple()->elements();
-  // parse Mask R-CNN outputs
-  auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(),
-       labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor();
-
-  cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl;
-  cout << "scores: " << scores.toString() << " " << scores.sizes() << endl;
-  cout << "labels: " << labels.toString() << " " << labels.sizes() << endl;
-  cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes()
-       << endl;
-
-  int num_instances = bbox.sizes()[0];
-  cout << bbox << endl;
-  return 0;
-}
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py
deleted file mode 100644
index 3e52185..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Detection Training Script.
-
-This scripts reads a given config file and runs the training or evaluation.
-It is an entry point that is made to train standard models in detectron2.
-
-In order to let one script support training of many models,
-this script contains logic that are specific to these built-in models and therefore
-may not be suitable for your own project.
-For example, your research project perhaps only needs a single "evaluator".
-
-Therefore, we recommend you to use detectron2 as an library and take
-this file as an example of how to use the library.
-You may want to write your own script with your data and other customizations.
-"""
-
-import logging
-import os
-from collections import OrderedDict
-import torch
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
-from detectron2.evaluation import (
-    CityscapesInstanceEvaluator,
-    CityscapesSemSegEvaluator,
-    COCOEvaluator,
-    COCOPanopticEvaluator,
-    DatasetEvaluators,
-    LVISEvaluator,
-    PascalVOCDetectionEvaluator,
-    SemSegEvaluator,
-    verify_results,
-)
-from detectron2.modeling import GeneralizedRCNNWithTTA
-
-# Register Custom Dataset
-from detectron2.data.datasets import register_coco_instances
-
-register_coco_instances("CIHP_train", {}, "../../data/msrcnn_finetune_annotations/CIHP_train.json",
-                        "../../data/instance-level_human_parsing/Training/Images")
-register_coco_instances("CIHP_val", {}, "../../data/msrcnn_finetune_annotations/CIHP_val.json",
-                        "../../data/instance-level_human_parsing/Validation/Images")
-register_coco_instances("demo_train", {}, "../../demo/annotations/demo_train.json",
-                        "../../demo/img")
-register_coco_instances("demo_val", {}, "../../demo/annotations/demo_val.json",
-                        "../../demo/img")
-
-
-class Trainer(DefaultTrainer):
-    """
-    We use the "DefaultTrainer" which contains pre-defined default logic for
-    standard training workflow. They may not work for you, especially if you
-    are working on a new research project. In that case you can use the cleaner
-    "SimpleTrainer", or write your own training loop. You can use
-    "tools/plain_train_net.py" as an example.
-    """
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        """
-        Create evaluator(s) for a given dataset.
-        This uses the special metadata "evaluator_type" associated with each builtin dataset.
-        For your own dataset, you can simply create an evaluator manually in your
-        script and do not have to worry about the hacky if-else logic here.
-        """
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        evaluator_list = []
-        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
-        if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
-            evaluator_list.append(
-                SemSegEvaluator(
-                    dataset_name,
-                    distributed=True,
-                    num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
-                    ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
-                    output_dir=output_folder,
-                )
-            )
-        if evaluator_type in ["coco", "coco_panoptic_seg"]:
-            evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
-        if evaluator_type == "coco_panoptic_seg":
-            evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
-        if evaluator_type == "cityscapes_instance":
-            assert (
-                    torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesInstanceEvaluator(dataset_name)
-        if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                    torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesSemSegEvaluator(dataset_name)
-        elif evaluator_type == "pascal_voc":
-            return PascalVOCDetectionEvaluator(dataset_name)
-        elif evaluator_type == "lvis":
-            return LVISEvaluator(dataset_name, cfg, True, output_folder)
-        if len(evaluator_list) == 0:
-            raise NotImplementedError(
-                "no Evaluator for the dataset {} with the type {}".format(
-                    dataset_name, evaluator_type
-                )
-            )
-        elif len(evaluator_list) == 1:
-            return evaluator_list[0]
-        return DatasetEvaluators(evaluator_list)
-
-    @classmethod
-    def test_with_TTA(cls, cfg, model):
-        logger = logging.getLogger("detectron2.trainer")
-        # In the end of training, run an evaluation with TTA
-        # Only support some R-CNN models.
-        logger.info("Running inference with test-time augmentation ...")
-        model = GeneralizedRCNNWithTTA(cfg, model)
-        evaluators = [
-            cls.build_evaluator(
-                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
-            )
-            for name in cfg.DATASETS.TEST
-        ]
-        res = cls.test(cfg, model, evaluators)
-        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
-        return res
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if cfg.TEST.AUG.ENABLED:
-            res.update(Trainer.test_with_TTA(cfg, model))
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    """
-    If you'd like to do anything fancier than the standard training logic,
-    consider writing your own training loop (see plain_train_net.py) or
-    subclassing the trainer.
-    """
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=False)
-    if cfg.TEST.AUG.ENABLED:
-        trainer.register_hooks(
-            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
-        )
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh b/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh
deleted file mode 100644
index 3b9d39e..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-python finetune_net.py \
-	--num-gpus 1 \
-	--config-file ../configs/Misc/parsing_inference.yaml \
-	--eval-only MODEL.WEIGHTS ./model_final.pth TEST.AUG.ENABLED False 
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py
deleted file mode 100644
index 52a0a28..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py
+++ /dev/null
@@ -1,237 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Detectron2 training script with a plain training loop.
-
-This script reads a given config file and runs the training or evaluation.
-It is an entry point that is able to train standard models in detectron2.
-
-In order to let one script support training of many models,
-this script contains logic that are specific to these built-in models and therefore
-may not be suitable for your own project.
-For example, your research project perhaps only needs a single "evaluator".
-
-Therefore, we recommend you to use detectron2 as a library and take
-this file as an example of how to use the library.
-You may want to write your own script with your data and other customizations.
-
-Compared to "train_net.py", this script supports fewer default features.
-It also includes fewer abstraction, therefore is easier to add custom logic.
-"""
-
-import logging
-import os
-from collections import OrderedDict
-import torch
-from torch.nn.parallel import DistributedDataParallel
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import (
-    MetadataCatalog,
-    build_detection_test_loader,
-    build_detection_train_loader,
-)
-from detectron2.engine import default_argument_parser, default_setup, launch
-from detectron2.evaluation import (
-    CityscapesInstanceEvaluator,
-    CityscapesSemSegEvaluator,
-    COCOEvaluator,
-    COCOPanopticEvaluator,
-    DatasetEvaluators,
-    LVISEvaluator,
-    PascalVOCDetectionEvaluator,
-    SemSegEvaluator,
-    inference_on_dataset,
-    print_csv_format,
-)
-from detectron2.modeling import build_model
-from detectron2.solver import build_lr_scheduler, build_optimizer
-from detectron2.utils.events import (
-    CommonMetricPrinter,
-    EventStorage,
-    JSONWriter,
-    TensorboardXWriter,
-)
-
-logger = logging.getLogger("detectron2")
-
-
-def get_evaluator(cfg, dataset_name, output_folder=None):
-    """
-    Create evaluator(s) for a given dataset.
-    This uses the special metadata "evaluator_type" associated with each builtin dataset.
-    For your own dataset, you can simply create an evaluator manually in your
-    script and do not have to worry about the hacky if-else logic here.
-    """
-    if output_folder is None:
-        output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-    evaluator_list = []
-    evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
-    if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
-        evaluator_list.append(
-            SemSegEvaluator(
-                dataset_name,
-                distributed=True,
-                num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
-                ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
-                output_dir=output_folder,
-            )
-        )
-    if evaluator_type in ["coco", "coco_panoptic_seg"]:
-        evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
-    if evaluator_type == "coco_panoptic_seg":
-        evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
-    if evaluator_type == "cityscapes_instance":
-        assert (
-            torch.cuda.device_count() >= comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
-        return CityscapesInstanceEvaluator(dataset_name)
-    if evaluator_type == "cityscapes_sem_seg":
-        assert (
-            torch.cuda.device_count() >= comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
-        return CityscapesSemSegEvaluator(dataset_name)
-    if evaluator_type == "pascal_voc":
-        return PascalVOCDetectionEvaluator(dataset_name)
-    if evaluator_type == "lvis":
-        return LVISEvaluator(dataset_name, cfg, True, output_folder)
-    if len(evaluator_list) == 0:
-        raise NotImplementedError(
-            "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type)
-        )
-    if len(evaluator_list) == 1:
-        return evaluator_list[0]
-    return DatasetEvaluators(evaluator_list)
-
-
-def do_test(cfg, model):
-    results = OrderedDict()
-    for dataset_name in cfg.DATASETS.TEST:
-        data_loader = build_detection_test_loader(cfg, dataset_name)
-        evaluator = get_evaluator(
-            cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
-        )
-        results_i = inference_on_dataset(model, data_loader, evaluator)
-        results[dataset_name] = results_i
-        if comm.is_main_process():
-            logger.info("Evaluation results for {} in csv format:".format(dataset_name))
-            print_csv_format(results_i)
-    if len(results) == 1:
-        results = list(results.values())[0]
-    return results
-
-
-def do_train(cfg, model, resume=False):
-    model.train()
-    optimizer = build_optimizer(cfg, model)
-    scheduler = build_lr_scheduler(cfg, optimizer)
-
-    checkpointer = DetectionCheckpointer(
-        model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler
-    )
-    start_iter = (
-        checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1
-    )
-    max_iter = cfg.SOLVER.MAX_ITER
-
-    periodic_checkpointer = PeriodicCheckpointer(
-        checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter
-    )
-
-    writers = (
-        [
-            CommonMetricPrinter(max_iter),
-            JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")),
-            TensorboardXWriter(cfg.OUTPUT_DIR),
-        ]
-        if comm.is_main_process()
-        else []
-    )
-
-    # compared to "train_net.py", we do not support accurate timing and
-    # precise BN here, because they are not trivial to implement
-    data_loader = build_detection_train_loader(cfg)
-    logger.info("Starting training from iteration {}".format(start_iter))
-    with EventStorage(start_iter) as storage:
-        for data, iteration in zip(data_loader, range(start_iter, max_iter)):
-            iteration = iteration + 1
-            storage.step()
-
-            loss_dict = model(data)
-            losses = sum(loss_dict.values())
-            assert torch.isfinite(losses).all(), loss_dict
-
-            loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
-            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
-            if comm.is_main_process():
-                storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)
-
-            optimizer.zero_grad()
-            losses.backward()
-            optimizer.step()
-            storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
-            scheduler.step()
-
-            if (
-                cfg.TEST.EVAL_PERIOD > 0
-                and iteration % cfg.TEST.EVAL_PERIOD == 0
-                and iteration != max_iter
-            ):
-                do_test(cfg, model)
-                # Compared to "train_net.py", the test results are not dumped to EventStorage
-                comm.synchronize()
-
-            if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter):
-                for writer in writers:
-                    writer.write()
-            periodic_checkpointer.step(iteration)
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(
-        cfg, args
-    )  # if you don't like any of the default setup, write your own setup code
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    model = build_model(cfg)
-    logger.info("Model:\n{}".format(model))
-    if args.eval_only:
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        return do_test(cfg, model)
-
-    distributed = comm.get_world_size() > 1
-    if distributed:
-        model = DistributedDataParallel(
-            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
-        )
-
-    do_train(cfg, model, resume=args.resume)
-    return do_test(cfg, model)
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh b/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh
deleted file mode 100644
index b892673..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-python finetune_net.py \
-	--config-file ../configs/Misc/parsing_finetune_cihp+vip.yaml \
-	--num-gpus 8
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py
deleted file mode 100644
index b1c0ee4..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Detection Training Script.
-
-This scripts reads a given config file and runs the training or evaluation.
-It is an entry point that is made to train standard models in detectron2.
-
-In order to let one script support training of many models,
-this script contains logic that are specific to these built-in models and therefore
-may not be suitable for your own project.
-For example, your research project perhaps only needs a single "evaluator".
-
-Therefore, we recommend you to use detectron2 as an library and take
-this file as an example of how to use the library.
-You may want to write your own script with your data and other customizations.
-"""
-
-import logging
-import os
-from collections import OrderedDict
-import torch
-
-import detectron2.utils.comm as comm
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
-from detectron2.evaluation import (
-    CityscapesInstanceEvaluator,
-    CityscapesSemSegEvaluator,
-    COCOEvaluator,
-    COCOPanopticEvaluator,
-    DatasetEvaluators,
-    LVISEvaluator,
-    PascalVOCDetectionEvaluator,
-    SemSegEvaluator,
-    verify_results,
-)
-from detectron2.modeling import GeneralizedRCNNWithTTA
-
-
-class Trainer(DefaultTrainer):
-    """
-    We use the "DefaultTrainer" which contains pre-defined default logic for
-    standard training workflow. They may not work for you, especially if you
-    are working on a new research project. In that case you can use the cleaner
-    "SimpleTrainer", or write your own training loop. You can use
-    "tools/plain_train_net.py" as an example.
-    """
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        """
-        Create evaluator(s) for a given dataset.
-        This uses the special metadata "evaluator_type" associated with each builtin dataset.
-        For your own dataset, you can simply create an evaluator manually in your
-        script and do not have to worry about the hacky if-else logic here.
-        """
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        evaluator_list = []
-        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
-        if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
-            evaluator_list.append(
-                SemSegEvaluator(
-                    dataset_name,
-                    distributed=True,
-                    num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
-                    ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
-                    output_dir=output_folder,
-                )
-            )
-        if evaluator_type in ["coco", "coco_panoptic_seg"]:
-            evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
-        if evaluator_type == "coco_panoptic_seg":
-            evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
-        if evaluator_type == "cityscapes_instance":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesInstanceEvaluator(dataset_name)
-        if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                torch.cuda.device_count() >= comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
-            return CityscapesSemSegEvaluator(dataset_name)
-        elif evaluator_type == "pascal_voc":
-            return PascalVOCDetectionEvaluator(dataset_name)
-        elif evaluator_type == "lvis":
-            return LVISEvaluator(dataset_name, cfg, True, output_folder)
-        if len(evaluator_list) == 0:
-            raise NotImplementedError(
-                "no Evaluator for the dataset {} with the type {}".format(
-                    dataset_name, evaluator_type
-                )
-            )
-        elif len(evaluator_list) == 1:
-            return evaluator_list[0]
-        return DatasetEvaluators(evaluator_list)
-
-    @classmethod
-    def test_with_TTA(cls, cfg, model):
-        logger = logging.getLogger("detectron2.trainer")
-        # In the end of training, run an evaluation with TTA
-        # Only support some R-CNN models.
-        logger.info("Running inference with test-time augmentation ...")
-        model = GeneralizedRCNNWithTTA(cfg, model)
-        evaluators = [
-            cls.build_evaluator(
-                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
-            )
-            for name in cfg.DATASETS.TEST
-        ]
-        res = cls.test(cfg, model, evaluators)
-        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
-        return res
-
-
-def setup(args):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-            cfg.MODEL.WEIGHTS, resume=args.resume
-        )
-        res = Trainer.test(cfg, model)
-        if cfg.TEST.AUG.ENABLED:
-            res.update(Trainer.test_with_TTA(cfg, model))
-        if comm.is_main_process():
-            verify_results(cfg, res)
-        return res
-
-    """
-    If you'd like to do anything fancier than the standard training logic,
-    consider writing your own training loop (see plain_train_net.py) or
-    subclassing the trainer.
-    """
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    if cfg.TEST.AUG.ENABLED:
-        trainer.register_hooks(
-            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
-        )
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args,),
-    )
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py
deleted file mode 100644
index b143b2d..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import argparse
-import os
-from itertools import chain
-import cv2
-import tqdm
-
-from detectron2.config import get_cfg
-from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
-from detectron2.data import detection_utils as utils
-from detectron2.data.build import filter_images_with_few_keypoints
-from detectron2.utils.logger import setup_logger
-from detectron2.utils.visualizer import Visualizer
-
-
-def setup(args):
-    cfg = get_cfg()
-    if args.config_file:
-        cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    return cfg
-
-
-def parse_args(in_args=None):
-    parser = argparse.ArgumentParser(description="Visualize ground-truth data")
-    parser.add_argument(
-        "--source",
-        choices=["annotation", "dataloader"],
-        required=True,
-        help="visualize the annotations or the data loader (with pre-processing)",
-    )
-    parser.add_argument("--config-file", metavar="FILE", help="path to config file")
-    parser.add_argument("--output-dir", default="./", help="path to output directory")
-    parser.add_argument("--show", action="store_true", help="show output in a window")
-    parser.add_argument(
-        "opts",
-        help="Modify config options using the command-line",
-        default=None,
-        nargs=argparse.REMAINDER,
-    )
-    return parser.parse_args(in_args)
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    logger = setup_logger()
-    logger.info("Arguments: " + str(args))
-    cfg = setup(args)
-
-    dirname = args.output_dir
-    os.makedirs(dirname, exist_ok=True)
-    metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
-
-    def output(vis, fname):
-        if args.show:
-            print(fname)
-            cv2.imshow("window", vis.get_image()[:, :, ::-1])
-            cv2.waitKey()
-        else:
-            filepath = os.path.join(dirname, fname)
-            print("Saving to {} ...".format(filepath))
-            vis.save(filepath)
-
-    scale = 2.0 if args.show else 1.0
-    if args.source == "dataloader":
-        train_data_loader = build_detection_train_loader(cfg)
-        for batch in train_data_loader:
-            for per_image in batch:
-                # Pytorch tensor is in (C, H, W) format
-                img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
-                img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT)
-
-                visualizer = Visualizer(img, metadata=metadata, scale=scale)
-                target_fields = per_image["instances"].get_fields()
-                labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
-                vis = visualizer.overlay_instances(
-                    labels=labels,
-                    boxes=target_fields.get("gt_boxes", None),
-                    masks=target_fields.get("gt_masks", None),
-                    keypoints=target_fields.get("gt_keypoints", None),
-                )
-                output(vis, str(per_image["image_id"]) + ".jpg")
-    else:
-        dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
-        if cfg.MODEL.KEYPOINT_ON:
-            dicts = filter_images_with_few_keypoints(dicts, 1)
-        for dic in tqdm.tqdm(dicts):
-            img = utils.read_image(dic["file_name"], "RGB")
-            visualizer = Visualizer(img, metadata=metadata, scale=scale)
-            vis = visualizer.draw_dataset_dict(dic)
-            output(vis, os.path.basename(dic["file_name"]))
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py
deleted file mode 100644
index d11ecb9..0000000
--- a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import argparse
-import json
-import numpy as np
-import os
-from collections import defaultdict
-import cv2
-import tqdm
-from fvcore.common.file_io import PathManager
-
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import Boxes, BoxMode, Instances
-from detectron2.utils.logger import setup_logger
-from detectron2.utils.visualizer import Visualizer
-
-
-def create_instances(predictions, image_size):
-    ret = Instances(image_size)
-
-    score = np.asarray([x["score"] for x in predictions])
-    chosen = (score > args.conf_threshold).nonzero()[0]
-    score = score[chosen]
-    bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
-    bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
-
-    labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen])
-
-    ret.scores = score
-    ret.pred_boxes = Boxes(bbox)
-    ret.pred_classes = labels
-
-    try:
-        ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
-    except KeyError:
-        pass
-    return ret
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="A script that visualizes the json predictions from COCO or LVIS dataset."
-    )
-    parser.add_argument("--input", required=True, help="JSON file produced by the model")
-    parser.add_argument("--output", required=True, help="output directory")
-    parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val")
-    parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold")
-    args = parser.parse_args()
-
-    logger = setup_logger()
-
-    with PathManager.open(args.input, "r") as f:
-        predictions = json.load(f)
-
-    pred_by_image = defaultdict(list)
-    for p in predictions:
-        pred_by_image[p["image_id"]].append(p)
-
-    dicts = list(DatasetCatalog.get(args.dataset))
-    metadata = MetadataCatalog.get(args.dataset)
-    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
-
-        def dataset_id_map(ds_id):
-            return metadata.thing_dataset_id_to_contiguous_id[ds_id]
-
-    elif "lvis" in args.dataset:
-        # LVIS results are in the same format as COCO results, but have a different
-        # mapping from dataset category id to contiguous category id in [0, #categories - 1]
-        def dataset_id_map(ds_id):
-            return ds_id - 1
-
-    else:
-        raise ValueError("Unsupported dataset: {}".format(args.dataset))
-
-    os.makedirs(args.output, exist_ok=True)
-
-    for dic in tqdm.tqdm(dicts):
-        img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
-        basename = os.path.basename(dic["file_name"])
-
-        predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
-        vis = Visualizer(img, metadata)
-        vis_pred = vis.draw_instance_predictions(predictions).get_image()
-
-        vis = Visualizer(img, metadata)
-        vis_gt = vis.draw_dataset_dict(dic).get_image()
-
-        concat = np.concatenate((vis_pred, vis_gt), axis=1)
-        cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py
deleted file mode 100644
index 8b00594..0000000
--- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   datasets.py
-@Time    :   8/4/19 3:35 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import os
-import numpy as np
-import random
-import torch
-import cv2
-from torch.utils import data
-from utils.transforms import get_affine_transform
-
-
-class CropDataSet(data.Dataset):
-    def __init__(self, root, split_name, crop_size=[473, 473], scale_factor=0.25,
-                 rotation_factor=30, ignore_label=255, transform=None):
-        self.root = root
-        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
-        self.crop_size = np.asarray(crop_size)
-        self.ignore_label = ignore_label
-        self.scale_factor = scale_factor
-        self.rotation_factor = rotation_factor
-        self.flip_prob = 0.5
-        self.transform = transform
-        self.split_name = split_name
-
-        list_path = os.path.join(self.root, self.split_name + '.txt')
-        train_list = [i_id.strip() for i_id in open(list_path)]
-
-        self.train_list = train_list
-        self.number_samples = len(self.train_list)
-
-    def __len__(self):
-        return self.number_samples
-
-    def _box2cs(self, box):
-        x, y, w, h = box[:4]
-        return self._xywh2cs(x, y, w, h)
-
-    def _xywh2cs(self, x, y, w, h):
-        center = np.zeros((2), dtype=np.float32)
-        center[0] = x + w * 0.5
-        center[1] = y + h * 0.5
-        if w > self.aspect_ratio * h:
-            h = w * 1.0 / self.aspect_ratio
-        elif w < self.aspect_ratio * h:
-            w = h * self.aspect_ratio
-        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
-        return center, scale
-
-    def __getitem__(self, index):
-        train_item = self.train_list[index]
-
-        im_path = os.path.join(self.root, self.split_name + '_images', train_item + '.jpg')
-        parsing_anno_path = os.path.join(self.root, self.split_name + '_segmentations', train_item + '.png')
-
-        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
-        h, w, _ = im.shape
-        parsing_anno = np.zeros((h, w), dtype=np.long)
-
-        # Get person center and scale
-        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
-        r = 0
-
-        if self.split_name != 'test':
-            # Get pose annotation
-            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
-            sf = self.scale_factor
-            rf = self.rotation_factor
-            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
-            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
-
-            if random.random() <= self.flip_prob:
-                im = im[:, ::-1, :]
-                parsing_anno = parsing_anno[:, ::-1]
-                person_center[0] = im.shape[1] - person_center[0] - 1
-                right_idx = [15, 17, 19]
-                left_idx = [14, 16, 18]
-                for i in range(0, 3):
-                    right_pos = np.where(parsing_anno == right_idx[i])
-                    left_pos = np.where(parsing_anno == left_idx[i])
-                    parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
-                    parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
-
-        trans = get_affine_transform(person_center, s, r, self.crop_size)
-        input = cv2.warpAffine(
-            im,
-            trans,
-            (int(self.crop_size[1]), int(self.crop_size[0])),
-            flags=cv2.INTER_LINEAR,
-            borderMode=cv2.BORDER_CONSTANT,
-            borderValue=(0, 0, 0))
-
-        if self.transform:
-            input = self.transform(input)
-
-        meta = {
-            'name': train_item,
-            'center': person_center,
-            'height': h,
-            'width': w,
-            'scale': s,
-            'rotation': r
-        }
-
-        if self.split_name == 'val' or self.split_name == 'test':
-            return input, meta
-        else:
-            label_parsing = cv2.warpAffine(
-                parsing_anno,
-                trans,
-                (int(self.crop_size[1]), int(self.crop_size[0])),
-                flags=cv2.INTER_NEAREST,
-                borderMode=cv2.BORDER_CONSTANT,
-                borderValue=(255))
-
-            label_parsing = torch.from_numpy(label_parsing)
-
-            return input, label_parsing, meta
-
-
-class CropDataValSet(data.Dataset):
-    def __init__(self, root, split_name='crop_pic', crop_size=[473, 473], transform=None, flip=False):
-        self.root = root
-        self.crop_size = crop_size
-        self.transform = transform
-        self.flip = flip
-        self.split_name = split_name
-        self.root = root
-        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
-        self.crop_size = np.asarray(crop_size)
-
-        list_path = os.path.join(self.root, self.split_name + '.txt')
-        val_list = [i_id.strip() for i_id in open(list_path)]
-
-        self.val_list = val_list
-        self.number_samples = len(self.val_list)
-
-    def __len__(self):
-        return len(self.val_list)
-
-    def _box2cs(self, box):
-        x, y, w, h = box[:4]
-        return self._xywh2cs(x, y, w, h)
-
-    def _xywh2cs(self, x, y, w, h):
-        center = np.zeros((2), dtype=np.float32)
-        center[0] = x + w * 0.5
-        center[1] = y + h * 0.5
-        if w > self.aspect_ratio * h:
-            h = w * 1.0 / self.aspect_ratio
-        elif w < self.aspect_ratio * h:
-            w = h * self.aspect_ratio
-        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
-
-        return center, scale
-
-    def __getitem__(self, index):
-        val_item = self.val_list[index]
-        # Load training image
-        im_path = os.path.join(self.root, self.split_name, val_item + '.jpg')
-        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
-        h, w, _ = im.shape
-        # Get person center and scale
-        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
-        r = 0
-        trans = get_affine_transform(person_center, s, r, self.crop_size)
-        input = cv2.warpAffine(
-            im,
-            trans,
-            (int(self.crop_size[1]), int(self.crop_size[0])),
-            flags=cv2.INTER_LINEAR,
-            borderMode=cv2.BORDER_CONSTANT,
-            borderValue=(0, 0, 0))
-        input = self.transform(input)
-        flip_input = input.flip(dims=[-1])
-        if self.flip:
-            batch_input_im = torch.stack([input, flip_input])
-        else:
-            batch_input_im = input
-
-        meta = {
-            'name': val_item,
-            'center': person_center,
-            'height': h,
-            'width': w,
-            'scale': s,
-            'rotation': r
-        }
-
-        return batch_input_im, meta
diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py
deleted file mode 100644
index 288e3c8..0000000
--- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   evaluate.py
-@Time    :   8/4/19 3:36 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import os
-import argparse
-import numpy as np
-import torch
-
-from torch.utils import data
-from tqdm import tqdm
-from PIL import Image as PILImage
-import torchvision.transforms as transforms
-import torch.backends.cudnn as cudnn
-
-import networks
-from utils.miou import compute_mean_ioU
-from utils.transforms import BGR2RGB_transform
-from utils.transforms import transform_parsing, transform_logits
-from mhp_extension.global_local_parsing.global_local_datasets import CropDataValSet
-
-
-def get_arguments():
-    """Parse all the arguments provided from the CLI.
-
-    Returns:
-      A list of parsed arguments.
-    """
-    parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
-
-    # Network Structure
-    parser.add_argument("--arch", type=str, default='resnet101')
-    # Data Preference
-    parser.add_argument("--data-dir", type=str, default='./data/LIP')
-    parser.add_argument("--batch-size", type=int, default=1)
-    parser.add_argument("--split-name", type=str, default='crop_pic')
-    parser.add_argument("--input-size", type=str, default='473,473')
-    parser.add_argument("--num-classes", type=int, default=20)
-    parser.add_argument("--ignore-label", type=int, default=255)
-    parser.add_argument("--random-mirror", action="store_true")
-    parser.add_argument("--random-scale", action="store_true")
-    # Evaluation Preference
-    parser.add_argument("--log-dir", type=str, default='./log')
-    parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar')
-    parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.")
-    parser.add_argument("--save-results", action="store_true", help="whether to save the results.")
-    parser.add_argument("--flip", action="store_true", help="random flip during the test.")
-    parser.add_argument("--multi-scales", type=str, default='1', help="multiple scales during the test")
-    return parser.parse_args()
-
-
-def get_palette(num_cls):
-    """ Returns the color map for visualizing the segmentation mask.
-    Args:
-        num_cls: Number of classes
-    Returns:
-        The color map
-    """
-    n = num_cls
-    palette = [0] * (n * 3)
-    for j in range(0, n):
-        lab = j
-        palette[j * 3 + 0] = 0
-        palette[j * 3 + 1] = 0
-        palette[j * 3 + 2] = 0
-        i = 0
-        while lab:
-            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
-            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
-            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
-            i += 1
-            lab >>= 3
-    return palette
-
-
-def multi_scale_testing(model, batch_input_im, crop_size=[473, 473], flip=True, multi_scales=[1]):
-    flipped_idx = (15, 14, 17, 16, 19, 18)
-    if len(batch_input_im.shape) > 4:
-        batch_input_im = batch_input_im.squeeze()
-    if len(batch_input_im.shape) == 3:
-        batch_input_im = batch_input_im.unsqueeze(0)
-
-    interp = torch.nn.Upsample(size=crop_size, mode='bilinear', align_corners=True)
-    ms_outputs = []
-    for s in multi_scales:
-        interp_im = torch.nn.Upsample(scale_factor=s, mode='bilinear', align_corners=True)
-        scaled_im = interp_im(batch_input_im)
-        parsing_output = model(scaled_im)
-        parsing_output = parsing_output[0][-1]
-        output = parsing_output[0]
-        if flip:
-            flipped_output = parsing_output[1]
-            flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :]
-            output += flipped_output.flip(dims=[-1])
-            output *= 0.5
-        output = interp(output.unsqueeze(0))
-        ms_outputs.append(output[0])
-    ms_fused_parsing_output = torch.stack(ms_outputs)
-    ms_fused_parsing_output = ms_fused_parsing_output.mean(0)
-    ms_fused_parsing_output = ms_fused_parsing_output.permute(1, 2, 0)  # HWC
-    parsing = torch.argmax(ms_fused_parsing_output, dim=2)
-    parsing = parsing.data.cpu().numpy()
-    ms_fused_parsing_output = ms_fused_parsing_output.data.cpu().numpy()
-    return parsing, ms_fused_parsing_output
-
-
-def main():
-    """Create the model and start the evaluation process."""
-    args = get_arguments()
-    multi_scales = [float(i) for i in args.multi_scales.split(',')]
-    gpus = [int(i) for i in args.gpu.split(',')]
-    assert len(gpus) == 1
-    if not args.gpu == 'None':
-        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
-
-    cudnn.benchmark = True
-    cudnn.enabled = True
-
-    h, w = map(int, args.input_size.split(','))
-    input_size = [h, w]
-
-    model = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=None)
-
-    IMAGE_MEAN = model.mean
-    IMAGE_STD = model.std
-    INPUT_SPACE = model.input_space
-    print('image mean: {}'.format(IMAGE_MEAN))
-    print('image std: {}'.format(IMAGE_STD))
-    print('input space:{}'.format(INPUT_SPACE))
-    if INPUT_SPACE == 'BGR':
-        print('BGR Transformation')
-        transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(mean=IMAGE_MEAN,
-                                 std=IMAGE_STD),
-
-        ])
-    if INPUT_SPACE == 'RGB':
-        print('RGB Transformation')
-        transform = transforms.Compose([
-            transforms.ToTensor(),
-            BGR2RGB_transform(),
-            transforms.Normalize(mean=IMAGE_MEAN,
-                                 std=IMAGE_STD),
-        ])
-
-    # Data loader
-    lip_test_dataset = CropDataValSet(args.data_dir, args.split_name, crop_size=input_size, transform=transform,
-                                      flip=args.flip)
-    num_samples = len(lip_test_dataset)
-    print('Totoal testing sample numbers: {}'.format(num_samples))
-    testloader = data.DataLoader(lip_test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True)
-
-    # Load model weight
-    state_dict = torch.load(args.model_restore)
-    from collections import OrderedDict
-    new_state_dict = OrderedDict()
-    for k, v in state_dict.items():
-        name = k[7:]  # remove `module.`
-        new_state_dict[name] = v
-    model.load_state_dict(new_state_dict)
-    model.cuda()
-    model.eval()
-
-    sp_results_dir = os.path.join(args.log_dir, args.split_name + '_parsing')
-    if not os.path.exists(sp_results_dir):
-        os.makedirs(sp_results_dir)
-
-    palette = get_palette(20)
-    parsing_preds = []
-    scales = np.zeros((num_samples, 2), dtype=np.float32)
-    centers = np.zeros((num_samples, 2), dtype=np.int32)
-    with torch.no_grad():
-        for idx, batch in enumerate(tqdm(testloader)):
-            image, meta = batch
-            if (len(image.shape) > 4):
-                image = image.squeeze()
-            im_name = meta['name'][0]
-            c = meta['center'].numpy()[0]
-            s = meta['scale'].numpy()[0]
-            w = meta['width'].numpy()[0]
-            h = meta['height'].numpy()[0]
-            scales[idx, :] = s
-            centers[idx, :] = c
-            parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip,
-                                                  multi_scales=multi_scales)
-            if args.save_results:
-                parsing_result = transform_parsing(parsing, c, s, w, h, input_size)
-                parsing_result_path = os.path.join(sp_results_dir, im_name + '.png')
-                output_im = PILImage.fromarray(np.asarray(parsing_result, dtype=np.uint8))
-                output_im.putpalette(palette)
-                output_im.save(parsing_result_path)
-                # save logits
-                logits_result = transform_logits(logits, c, s, w, h, input_size)
-                logits_result_path = os.path.join(sp_results_dir, im_name + '.npy')
-                np.save(logits_result_path, logits_result)
-    return
-
-
-if __name__ == '__main__':
-    main()
diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py
deleted file mode 100644
index 810b1db..0000000
--- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   train.py
-@Time    :   8/4/19 3:36 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import os
-import json
-import timeit
-import argparse
-
-import torch
-import torch.optim as optim
-import torchvision.transforms as transforms
-import torch.backends.cudnn as cudnn
-from torch.utils import data
-
-import networks
-import utils.schp as schp
-from datasets.datasets import LIPDataSet
-from datasets.target_generation import generate_edge_tensor
-from utils.transforms import BGR2RGB_transform
-from utils.criterion import CriterionAll
-from utils.encoding import DataParallelModel, DataParallelCriterion
-from utils.warmup_scheduler import SGDRScheduler
-
-
-def get_arguments():
-    """Parse all the arguments provided from the CLI.
-    Returns:
-      A list of parsed arguments.
-    """
-    parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
-
-    # Network Structure
-    parser.add_argument("--arch", type=str, default='resnet101')
-    # Data Preference
-    parser.add_argument("--data-dir", type=str, default='./data/LIP')
-    parser.add_argument("--batch-size", type=int, default=16)
-    parser.add_argument("--input-size", type=str, default='473,473')
-    parser.add_argument("--split-name", type=str, default='crop_pic')
-    parser.add_argument("--num-classes", type=int, default=20)
-    parser.add_argument("--ignore-label", type=int, default=255)
-    parser.add_argument("--random-mirror", action="store_true")
-    parser.add_argument("--random-scale", action="store_true")
-    # Training Strategy
-    parser.add_argument("--learning-rate", type=float, default=7e-3)
-    parser.add_argument("--momentum", type=float, default=0.9)
-    parser.add_argument("--weight-decay", type=float, default=5e-4)
-    parser.add_argument("--gpu", type=str, default='0,1,2')
-    parser.add_argument("--start-epoch", type=int, default=0)
-    parser.add_argument("--epochs", type=int, default=150)
-    parser.add_argument("--eval-epochs", type=int, default=10)
-    parser.add_argument("--imagenet-pretrain", type=str, default='./pretrain_model/resnet101-imagenet.pth')
-    parser.add_argument("--log-dir", type=str, default='./log')
-    parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar')
-    parser.add_argument("--schp-start", type=int, default=100, help='schp start epoch')
-    parser.add_argument("--cycle-epochs", type=int, default=10, help='schp cyclical epoch')
-    parser.add_argument("--schp-restore", type=str, default='./log/schp_checkpoint.pth.tar')
-    parser.add_argument("--lambda-s", type=float, default=1, help='segmentation loss weight')
-    parser.add_argument("--lambda-e", type=float, default=1, help='edge loss weight')
-    parser.add_argument("--lambda-c", type=float, default=0.1, help='segmentation-edge consistency loss weight')
-    return parser.parse_args()
-
-
-def main():
-    args = get_arguments()
-    print(args)
-
-    start_epoch = 0
-    cycle_n = 0
-
-    if not os.path.exists(args.log_dir):
-        os.makedirs(args.log_dir)
-    with open(os.path.join(args.log_dir, 'args.json'), 'w') as opt_file:
-        json.dump(vars(args), opt_file)
-
-    gpus = [int(i) for i in args.gpu.split(',')]
-    if not args.gpu == 'None':
-        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
-
-    input_size = list(map(int, args.input_size.split(',')))
-
-    cudnn.enabled = True
-    cudnn.benchmark = True
-
-    # Model Initialization
-    AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain)
-    model = DataParallelModel(AugmentCE2P)
-    model.cuda()
-
-    IMAGE_MEAN = AugmentCE2P.mean
-    IMAGE_STD = AugmentCE2P.std
-    INPUT_SPACE = AugmentCE2P.input_space
-    print('image mean: {}'.format(IMAGE_MEAN))
-    print('image std: {}'.format(IMAGE_STD))
-    print('input space:{}'.format(INPUT_SPACE))
-
-    restore_from = args.model_restore
-    if os.path.exists(restore_from):
-        print('Resume training from {}'.format(restore_from))
-        checkpoint = torch.load(restore_from)
-        model.load_state_dict(checkpoint['state_dict'])
-        start_epoch = checkpoint['epoch']
-
-    SCHP_AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain)
-    schp_model = DataParallelModel(SCHP_AugmentCE2P)
-    schp_model.cuda()
-
-    if os.path.exists(args.schp_restore):
-        print('Resuming schp checkpoint from {}'.format(args.schp_restore))
-        schp_checkpoint = torch.load(args.schp_restore)
-        schp_model_state_dict = schp_checkpoint['state_dict']
-        cycle_n = schp_checkpoint['cycle_n']
-        schp_model.load_state_dict(schp_model_state_dict)
-
-    # Loss Function
-    criterion = CriterionAll(lambda_1=args.lambda_s, lambda_2=args.lambda_e, lambda_3=args.lambda_c,
-                             num_classes=args.num_classes)
-    criterion = DataParallelCriterion(criterion)
-    criterion.cuda()
-
-    # Data Loader
-    if INPUT_SPACE == 'BGR':
-        print('BGR Transformation')
-        transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(mean=IMAGE_MEAN,
-                                 std=IMAGE_STD),
-        ])
-
-    elif INPUT_SPACE == 'RGB':
-        print('RGB Transformation')
-        transform = transforms.Compose([
-            transforms.ToTensor(),
-            BGR2RGB_transform(),
-            transforms.Normalize(mean=IMAGE_MEAN,
-                                 std=IMAGE_STD),
-        ])
-
-    train_dataset = LIPDataSet(args.data_dir, args.split_name, crop_size=input_size, transform=transform)
-    train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size * len(gpus),
-                                   num_workers=16, shuffle=True, pin_memory=True, drop_last=True)
-    print('Total training samples: {}'.format(len(train_dataset)))
-
-    # Optimizer Initialization
-    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum,
-                          weight_decay=args.weight_decay)
-
-    lr_scheduler = SGDRScheduler(optimizer, total_epoch=args.epochs,
-                                 eta_min=args.learning_rate / 100, warmup_epoch=10,
-                                 start_cyclical=args.schp_start, cyclical_base_lr=args.learning_rate / 2,
-                                 cyclical_epoch=args.cycle_epochs)
-
-    total_iters = args.epochs * len(train_loader)
-    start = timeit.default_timer()
-    for epoch in range(start_epoch, args.epochs):
-        lr_scheduler.step(epoch=epoch)
-        lr = lr_scheduler.get_lr()[0]
-
-        model.train()
-        for i_iter, batch in enumerate(train_loader):
-            i_iter += len(train_loader) * epoch
-
-            images, labels, _ = batch
-            labels = labels.cuda(non_blocking=True)
-
-            edges = generate_edge_tensor(labels)
-            labels = labels.type(torch.cuda.LongTensor)
-            edges = edges.type(torch.cuda.LongTensor)
-
-            preds = model(images)
-
-            # Online Self Correction Cycle with Label Refinement
-            if cycle_n >= 1:
-                with torch.no_grad():
-                    soft_preds = schp_model(images)
-                    soft_parsing = []
-                    soft_edge = []
-                    for soft_pred in soft_preds:
-                        soft_parsing.append(soft_pred[0][-1])
-                        soft_edge.append(soft_pred[1][-1])
-                    soft_preds = torch.cat(soft_parsing, dim=0)
-                    soft_edges = torch.cat(soft_edge, dim=0)
-            else:
-                soft_preds = None
-                soft_edges = None
-
-            loss = criterion(preds, [labels, edges, soft_preds, soft_edges], cycle_n)
-
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-
-            if i_iter % 100 == 0:
-                print('iter = {} of {} completed, lr = {}, loss = {}'.format(i_iter, total_iters, lr,
-                                                                             loss.data.cpu().numpy()))
-        if (epoch + 1) % (args.eval_epochs) == 0:
-            schp.save_checkpoint({
-                'epoch': epoch + 1,
-                'state_dict': model.state_dict(),
-            }, False, args.log_dir, filename='checkpoint_{}.pth.tar'.format(epoch + 1))
-
-        # Self Correction Cycle with Model Aggregation
-        if (epoch + 1) >= args.schp_start and (epoch + 1 - args.schp_start) % args.cycle_epochs == 0:
-            print('Self-correction cycle number {}'.format(cycle_n))
-            schp.moving_average(schp_model, model, 1.0 / (cycle_n + 1))
-            cycle_n += 1
-            schp.bn_re_estimate(train_loader, schp_model)
-            schp.save_schp_checkpoint({
-                'state_dict': schp_model.state_dict(),
-                'cycle_n': cycle_n,
-            }, False, args.log_dir, filename='schp_{}_checkpoint.pth.tar'.format(cycle_n))
-
-        torch.cuda.empty_cache()
-        end = timeit.default_timer()
-        print('epoch = {} of {} completed using {} s'.format(epoch, args.epochs,
-                                                             (end - start) / (epoch - start_epoch + 1)))
-
-    end = timeit.default_timer()
-    print('Training Finished in {} seconds'.format(end - start))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py
deleted file mode 100644
index 311edf4..0000000
--- a/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import os
-
-DATASET = 'VIP'  # DATASET: MHPv2 or CIHP or VIP
-TYPE = 'crop_pic'  # crop_pic or DemoDataset
-IMG_DIR = '../demo/cropped_img/crop_pic'
-SAVE_DIR = '../demo/cropped_img'
-
-if not os.path.exists(SAVE_DIR):
-    os.makedirs(SAVE_DIR)
-
-with open(os.path.join(SAVE_DIR, TYPE + '.txt'), "w") as f:
-    for img_name in os.listdir(IMG_DIR):
-        f.write(img_name[:-4] + '\n')
diff --git a/preprocess/humanparsing/mhp_extension/logits_fusion.py b/preprocess/humanparsing/mhp_extension/logits_fusion.py
deleted file mode 100644
index 07a8446..0000000
--- a/preprocess/humanparsing/mhp_extension/logits_fusion.py
+++ /dev/null
@@ -1,307 +0,0 @@
-import argparse
-import cv2
-import os
-import json
-import numpy as np
-from PIL import Image as PILImage
-import joblib
-
-
-def mask_nms(masks, bbox_scores, instances_confidence_threshold=0.5, overlap_threshold=0.7):
-    """
-    NMS-like procedure used in Panoptic Segmentation
-    Remove the overlap areas of different instances in Instance Segmentation
-    """
-    panoptic_seg = np.zeros(masks.shape[:2], dtype=np.uint8)
-    sorted_inds = list(range(len(bbox_scores)))
-    current_segment_id = 0
-    segments_score = []
-
-    for inst_id in sorted_inds:
-        score = bbox_scores[inst_id]
-        if score < instances_confidence_threshold:
-            break
-        mask = masks[:, :, inst_id]
-        mask_area = mask.sum()
-
-        if mask_area == 0:
-            continue
-
-        intersect = (mask > 0) & (panoptic_seg > 0)
-        intersect_area = intersect.sum()
-
-        if intersect_area * 1.0 / mask_area > overlap_threshold:
-            continue
-
-        if intersect_area > 0:
-            mask = mask & (panoptic_seg == 0)
-
-        current_segment_id += 1
-        #         panoptic_seg[np.where(mask==1)] = current_segment_id
-        #         panoptic_seg = panoptic_seg + current_segment_id*mask
-        panoptic_seg = np.where(mask == 0, panoptic_seg, current_segment_id)
-        segments_score.append(score)
-    #         print(np.unique(panoptic_seg))
-    return panoptic_seg, segments_score
-
-
-def extend(si, sj, instance_label, global_label, panoptic_seg_mask, class_map):
-    """
-    """
-    directions = [[-1, 0], [0, 1], [1, 0], [0, -1],
-                  [1, 1], [1, -1], [-1, 1], [-1, -1]]
-
-    inst_class = instance_label[si, sj]
-    human_class = panoptic_seg_mask[si, sj]
-    global_class = class_map[inst_class]
-    queue = [[si, sj]]
-
-    while len(queue) != 0:
-        cur = queue[0]
-        queue.pop(0)
-
-        for direction in directions:
-            ni = cur[0] + direction[0]
-            nj = cur[1] + direction[1]
-
-            if ni >= 0 and nj >= 0 and \
-                    ni < instance_label.shape[0] and \
-                    nj < instance_label.shape[1] and \
-                    instance_label[ni, nj] == 0 and \
-                    global_label[ni, nj] == global_class:
-                instance_label[ni, nj] = inst_class
-                # Using refined instance label to refine human label
-                panoptic_seg_mask[ni, nj] = human_class
-                queue.append([ni, nj])
-
-
-def refine(instance_label, panoptic_seg_mask, global_label, class_map):
-    """
-    Inputs:
-        [ instance_label ]
-            np.array() with shape [h, w]
-        [ global_label ] with shape [h, w]
-            np.array()
-  """
-    for i in range(instance_label.shape[0]):
-        for j in range(instance_label.shape[1]):
-            if instance_label[i, j] != 0:
-                extend(i, j, instance_label, global_label, panoptic_seg_mask, class_map)
-
-
-def get_palette(num_cls):
-    """ Returns the color map for visualizing the segmentation mask.
-    Inputs:
-        =num_cls=
-            Number of classes.
-    Returns:
-        The color map.
-    """
-    n = num_cls
-    palette = [0] * (n * 3)
-    for j in range(0, n):
-        lab = j
-        palette[j * 3 + 0] = 0
-        palette[j * 3 + 1] = 0
-        palette[j * 3 + 2] = 0
-        i = 0
-        while lab:
-            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
-            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
-            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
-            i += 1
-            lab >>= 3
-    return palette
-
-
-def patch2img_output(patch_dir, img_name, img_height, img_width, bbox, bbox_type, num_class):
-    """transform bbox patch outputs to image output"""
-    assert bbox_type == 'gt' or 'msrcnn'
-    output = np.zeros((img_height, img_width, num_class), dtype='float')
-    output[:, :, 0] = np.inf
-    count_predictions = np.zeros((img_height, img_width, num_class), dtype='int32')
-    for i in range(len(bbox)):  # person index starts from 1
-        file_path = os.path.join(patch_dir, os.path.splitext(img_name)[0] + '_' + str(i + 1) + '_' + bbox_type + '.npy')
-        bbox_output = np.load(file_path)
-        output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 1:] += bbox_output[:, :, 1:]
-        count_predictions[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 1:] += 1
-        output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 0] \
-            = np.minimum(output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 0], bbox_output[:, :, 0])
-
-    # Caution zero dividing.
-    count_predictions[count_predictions == 0] = 1
-    return output / count_predictions
-
-
-def get_instance(cat_gt, panoptic_seg_mask):
-    """
-    """
-    instance_gt = np.zeros_like(cat_gt, dtype=np.uint8)
-    num_humans = len(np.unique(panoptic_seg_mask)) - 1
-    class_map = {}
-
-    total_part_num = 0
-    for id in range(1, num_humans + 1):
-        human_part_label = np.where(panoptic_seg_mask == id, cat_gt, 0).astype(np.uint8)
-        #         human_part_label = (np.where(panoptic_seg_mask==id) * cat_gt).astype(np.uint8)
-        part_classes = np.unique(human_part_label)
-
-        exceed = False
-        for part_id in part_classes:
-            if part_id == 0:  # background
-                continue
-            total_part_num += 1
-
-            if total_part_num > 255:
-                print("total_part_num exceed, return current instance map: {}".format(total_part_num))
-                exceed = True
-                break
-            class_map[total_part_num] = part_id
-            instance_gt[np.where(human_part_label == part_id)] = total_part_num
-        if exceed:
-            break
-
-    # Make instance id continous.
-    ori_cur_labels = np.unique(instance_gt)
-    total_num_label = len(ori_cur_labels)
-    if instance_gt.max() + 1 != total_num_label:
-        for label in range(1, total_num_label):
-            instance_gt[instance_gt == ori_cur_labels[label]] = label
-
-    final_class_map = {}
-    for label in range(1, total_num_label):
-        if label >= 1:
-            final_class_map[label] = class_map[ori_cur_labels[label]]
-
-    return instance_gt, final_class_map
-
-
-def compute_confidence(im_name, feature_map, class_map,
-                       instance_label, output_dir,
-                       panoptic_seg_mask, seg_score_list):
-    """
-    """
-    conf_file = open(os.path.join(output_dir, os.path.splitext(im_name)[0] + '.txt'), 'w')
-
-    weighted_map = np.zeros_like(feature_map[:, :, 0])
-    for index, score in enumerate(seg_score_list):
-        weighted_map += (panoptic_seg_mask == index + 1) * score
-
-    for label in class_map.keys():
-        cls = class_map[label]
-        confidence = feature_map[:, :, cls].reshape(-1)[np.where(instance_label.reshape(-1) == label)]
-        confidence = (weighted_map * feature_map[:, :, cls].copy()).reshape(-1)[
-            np.where(instance_label.reshape(-1) == label)]
-
-        confidence = confidence.sum() / len(confidence)
-        conf_file.write('{} {}\n'.format(cls, confidence))
-
-    conf_file.close()
-
-
-def result_saving(fused_output, img_name, img_height, img_width, output_dir, mask_output_path, bbox_score, msrcnn_bbox):
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    global_root = os.path.join(output_dir, 'global_parsing')
-    instance_root = os.path.join(output_dir, 'instance_parsing')
-    tag_dir = os.path.join(output_dir, 'global_tag')
-
-    if not os.path.exists(global_root):
-        os.makedirs(global_root)
-    if not os.path.exists(instance_root):
-        os.makedirs(instance_root)
-    if not os.path.exists(tag_dir):
-        os.makedirs(tag_dir)
-
-    # For visualizing indexed png image.
-    palette = get_palette(256)
-
-    fused_output = cv2.resize(fused_output, dsize=(img_width, img_height), interpolation=cv2.INTER_LINEAR)
-    seg_pred = np.asarray(np.argmax(fused_output, axis=2), dtype=np.uint8)
-    masks = np.load(mask_output_path)
-    masks[np.where(seg_pred == 0)] = 0
-
-    panoptic_seg_mask = masks
-    seg_score_list = bbox_score
-
-    instance_pred, class_map = get_instance(seg_pred, panoptic_seg_mask)
-    refine(instance_pred, panoptic_seg_mask, seg_pred, class_map)
-
-    compute_confidence(img_name, fused_output, class_map, instance_pred, instance_root,
-                       panoptic_seg_mask, seg_score_list)
-
-    ins_seg_results = open(os.path.join(tag_dir, os.path.splitext(img_name)[0] + '.txt'), "a")
-    keep_human_id_list = list(np.unique(panoptic_seg_mask))
-    if 0 in keep_human_id_list:
-        keep_human_id_list.remove(0)
-    for i in keep_human_id_list:
-        ins_seg_results.write('{:.6f} {} {} {} {}\n'.format(seg_score_list[i - 1],
-                                                            int(msrcnn_bbox[i - 1][1]), int(msrcnn_bbox[i - 1][0]),
-                                                            int(msrcnn_bbox[i - 1][3]), int(msrcnn_bbox[i - 1][2])))
-    ins_seg_results.close()
-
-    output_im_global = PILImage.fromarray(seg_pred)
-    output_im_instance = PILImage.fromarray(instance_pred)
-    output_im_tag = PILImage.fromarray(panoptic_seg_mask)
-    output_im_global.putpalette(palette)
-    output_im_instance.putpalette(palette)
-    output_im_tag.putpalette(palette)
-
-    output_im_global.save(os.path.join(global_root, os.path.splitext(img_name)[0] + '.png'))
-    output_im_instance.save(os.path.join(instance_root, os.path.splitext(img_name)[0] + '.png'))
-    output_im_tag.save(os.path.join(tag_dir, os.path.splitext(img_name)[0] + '.png'))
-
-
-def multi_process(a, args):
-    img_name = a['im_name']
-    img_height = a['img_height']
-    img_width = a['img_width']
-    msrcnn_bbox = a['person_bbox']
-    bbox_score = a['person_bbox_score']
-
-    ######### loading outputs from gloabl and local models #########
-    global_output = np.load(os.path.join(args.global_output_dir, os.path.splitext(img_name)[0] + '.npy'))
-
-    msrcnn_output = patch2img_output(args.msrcnn_output_dir, img_name, img_height, img_width, msrcnn_bbox,
-                                     bbox_type='msrcnn', num_class=20)
-
-    gt_output = patch2img_output(args.gt_output_dir, img_name, img_height, img_width, msrcnn_bbox, bbox_type='msrcnn',
-                                 num_class=20)
-
-    #### global and local branch logits fusion #####
-#     fused_output = global_output + msrcnn_output + gt_output
-    fused_output = global_output + gt_output
-
-
-    mask_output_path = os.path.join(args.mask_output_dir, os.path.splitext(img_name)[0] + '_mask.npy')
-    result_saving(fused_output, img_name, img_height, img_width, args.save_dir, mask_output_path, bbox_score, msrcnn_bbox)
-    return
-
-
-def main(args):
-    json_file = open(args.test_json_path)
-    anno = json.load(json_file)['root']
-
-    results = joblib.Parallel(n_jobs=24, verbose=10, pre_dispatch="all")(
-        [joblib.delayed(multi_process)(a, args) for i, a in enumerate(anno)]
-    )
-
-
-def get_arguments():
-    parser = argparse.ArgumentParser(description="obtain final prediction by logits fusion")
-    parser.add_argument("--test_json_path", type=str, default='./data/CIHP/cascade_152_finetune/test.json')
-    parser.add_argument("--global_output_dir", type=str,
-                        default='./data/CIHP/global/global_result-cihp-resnet101/global_output')
-#     parser.add_argument("--msrcnn_output_dir", type=str,
-#                         default='./data/CIHP/cascade_152__finetune/msrcnn_result-cihp-resnet101/msrcnn_output')
-    parser.add_argument("--gt_output_dir", type=str,
-                        default='./data/CIHP/cascade_152__finetune/gt_result-cihp-resnet101/gt_output')
-    parser.add_argument("--mask_output_dir", type=str, default='./data/CIHP/cascade_152_finetune/mask')
-    parser.add_argument("--save_dir", type=str, default='./data/CIHP/fusion_results/cihp-msrcnn_finetune')
-    return parser.parse_args()
-
-
-if __name__ == '__main__':
-    args = get_arguments()
-    main(args)
diff --git a/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py b/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py
deleted file mode 100644
index 1efc5ae..0000000
--- a/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import numpy as np
-import cv2, torch
-import os
-import json
-import argparse
-import pycocotools.mask as mask_util
-from tqdm import tqdm
-
-
-def bbox_expand(img_height, img_width, bbox, exp_ratio):
-    x_min, y_min, x_max, y_max = bbox[:]
-    exp_x = (x_max - x_min) * ((exp_ratio - 1) / 2)
-    exp_y = (y_max - y_min) * ((exp_ratio - 1) / 2)
-    new_x_min = 0 if x_min - exp_x < 0 else np.round(x_min - exp_x)
-    new_y_min = 0 if y_min - exp_y < 0 else np.round(y_min - exp_y)
-    new_x_max = img_width - 1 if x_max + exp_x > img_width - 1 else np.round(x_max + exp_x)
-    new_y_max = img_height - 1 if y_max + exp_y > img_height - 1 else np.round(y_max + exp_y)
-    return int(new_x_min), int(new_y_min), int(new_x_max), int(new_y_max)
-
-
-def make_crop_and_mask(img_info, pred, file_list, crop_save_dir, mask_save_dir, args):
-    img_name = img_info['file_name']
-    img_id = img_info['id'] - 1  # img_info['id'] start form 1
-    img_w = img_info['width']
-    img_h = img_info['height']
-
-    img = cv2.imread(os.path.join(args.img_dir, img_name))
-
-    exp_bbox = []
-    ori_bbox = []
-    bbox_name_list = []
-    bbox_score_list = []
-    person_idx = 0
-
-    panoptic_seg = np.zeros((img_h, img_w), dtype=np.uint8)
-    assert len(pred[img_id]['instances']) > 0, 'image without instance prediction'
-
-    for instance in pred[img_id]['instances']:
-        score = instance['score']
-        if score < args.conf_thres:
-            break
-
-        mask = mask_util.decode(instance['segmentation'])
-        mask_area = mask.sum()
-
-        if mask_area == 0:  # if mask_area < img_w*img_h/1000:
-            continue
-
-        intersect = (mask > 0) & (panoptic_seg > 0)
-        intersect_area = intersect.sum()
-
-        if intersect_area * 1.0 / mask_area > args.overlap_threshold:  # todo add args
-            continue
-
-        if intersect_area > 0:
-            mask = mask & (panoptic_seg == 0)
-
-        person_idx += 1
-        panoptic_seg = np.where(mask == 0, panoptic_seg, person_idx)
-
-        bbox_score_list.append(score)
-
-        ins_bbox = instance['bbox']  # [x,y,w,h] format
-        x_min, y_min, box_w, box_h = ins_bbox
-        x_max, y_max = x_min + box_w, y_min + box_h
-        exp_x_min, exp_y_min, exp_x_max, exp_y_max = bbox_expand(img_h, img_w, [x_min, y_min, x_max, y_max],
-                                                                 args.exp_ratio)
-        crop_img = img[exp_y_min:exp_y_max + 1, exp_x_min:exp_x_max + 1, :]
-        exp_bbox.append([exp_x_min, exp_y_min, exp_x_max, exp_y_max])
-        ori_bbox.append([x_min, y_min, x_max, y_max])
-        bbox_name = os.path.splitext(img_name)[0] + '_' + str(person_idx) + '_msrcnn.jpg'
-        bbox_name_list.append(bbox_name)
-
-        cv2.imwrite(os.path.join(crop_save_dir, bbox_name), crop_img)
-
-    assert person_idx > 0, 'image without instance'
-    mask_name = os.path.splitext(img_name)[0] + '_mask.npy'
-    np.save(os.path.join(mask_save_dir, mask_name), panoptic_seg)
-
-    ############## json writing ##################
-    item = {}
-    item['dataset'] = 'CIHP'
-    item['im_name'] = img_name
-    item['img_height'] = img_h
-    item['img_width'] = img_w
-    item['center'] = [img_h / 2, img_w / 2]
-    item['person_num'] = person_idx
-    item['person_bbox'] = exp_bbox
-    item['real_person_bbox'] = ori_bbox
-    item['person_bbox_score'] = bbox_score_list
-    item['bbox_name'] = bbox_name_list
-    item['mask_name'] = mask_name
-    file_list.append(item)
-    json_file = {'root': file_list}
-    return json_file, file_list
-
-
-def get_arguments():
-    parser = argparse.ArgumentParser(description="crop person val/test demo for inference")
-    parser.add_argument("--exp_ratio", type=float, default=1.2)
-    parser.add_argument("--overlap_threshold", type=float, default=0.5)
-    parser.add_argument("--conf_thres", type=float, default=0.5)
-    parser.add_argument("--img_dir", type=str,
-                        default='/data03/v_xuyunqiu/data/instance-level_human_parsing/Testing/Images')
-    parser.add_argument("--save_dir", type=str,
-                        default='/data03/v_xuyunqiu/Projects/experiment_data/testing/resnest_200_TTA_mask_nms_all_data')
-    parser.add_argument("--img_list", type=str,
-                        default='/data03/v_xuyunqiu/Projects/pycococreator/annotations/CIHP_test.json')
-    parser.add_argument("--det_res", type=str,
-                        default='/data02/v_xuyunqiu/detectron2-ResNeSt/tools/output_cihp_inference_resnest/inference_TTA/instances_predictions.pth')
-    return parser.parse_args()
-
-
-def main(args):
-    img_info_list = json.load(open(args.img_list, encoding='UTF-8'))
-    pred = torch.load(args.det_res)
-
-    crop_save_dir = os.path.join(args.save_dir, 'crop_pic')
-    if not os.path.exists(crop_save_dir):
-        os.makedirs(crop_save_dir)
-    mask_save_dir = os.path.join(args.save_dir, 'crop_mask')
-    if not os.path.exists(mask_save_dir):
-        os.makedirs(mask_save_dir)
-
-    file_list = []
-    for img_info in tqdm(img_info_list['images']):
-        json_file, file_list = make_crop_and_mask(img_info, pred, file_list, crop_save_dir, mask_save_dir, args)
-        with open(os.path.join(args.save_dir, 'crop.json'), 'w') as f:
-            json.dump(json_file, f, indent=2)
-
-
-if __name__ == '__main__':
-    args = get_arguments()
-    main(args)
diff --git a/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh b/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh
deleted file mode 100644
index 37a1e7d..0000000
--- a/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-python ./coco_style_annotation_creator/human_to_coco.py \
-    --dataset 'CIHP' \
-    --json_save_dir './data/CIHP/annotations' \
-    --train_img_dir './data/CIHP/Training/Images' \
-    --train_anno_dir './data/CIHP/Training/Human_ids' \
-    --val_img_dir './data/CIHP/Validation/Images' \
-    --val_anno_dir './data/CIHP/Validation/Human_ids'
-
-
-python ./coco_style_annotation_creator/test_human2coco_format.py \
-    --dataset 'CIHP' \
-    --json_save_dir './data/CIHP/annotations' \
-    --test_img_dir './data/CIHP/Testing/Images'
-
diff --git a/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh b/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh
deleted file mode 100644
index 604a433..0000000
--- a/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-python make_crop_and_mask_w_mask_nms.py \
-	--img_dir './data/CIHP/Testing/Images' \
-	--save_dir './data/CIHP/' \
-	--img_list './data/CIHP/annotations/CIHP_val.json' \
-	--det_res './data/CIHP/detectron2_prediction/inference/instances_predictions.pth'
-
diff --git a/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh b/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh
deleted file mode 100644
index 107bcf6..0000000
--- a/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-python logits_fusion.py \
---test_json_path ./data/CIHP/crop.json \
---global_output_dir ./data/CIHP/global_pic_parsing \
---msrcnn_output_dir ./data/CIHP/crop_pic_parsing \
---gt_output_dir ./data/CIHP/crop_pic_parsing \
---save_dir ./data/CIHP/mhp_fusion_parsing
diff --git a/preprocess/humanparsing/modules/__init__.py b/preprocess/humanparsing/modules/__init__.py
deleted file mode 100644
index 8a098de..0000000
--- a/preprocess/humanparsing/modules/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .bn import ABN, InPlaceABN, InPlaceABNSync
-from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
-from .misc import GlobalAvgPool2d, SingleGPU
-from .residual import IdentityResidualBlock
-from .dense import DenseModule
diff --git a/preprocess/humanparsing/modules/bn.py b/preprocess/humanparsing/modules/bn.py
deleted file mode 100644
index a794698..0000000
--- a/preprocess/humanparsing/modules/bn.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as functional
-
-try:
-    from queue import Queue
-except ImportError:
-    from Queue import Queue
-
-from .functions import *
-
-
-class ABN(nn.Module):
-    """Activated Batch Normalization
-
-    This gathers a `BatchNorm2d` and an activation function in a single module
-    """
-
-    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
-        """Creates an Activated Batch Normalization module
-
-        Parameters
-        ----------
-        num_features : int
-            Number of feature channels in the input and output.
-        eps : float
-            Small constant to prevent numerical issues.
-        momentum : float
-            Momentum factor applied to compute running statistics as.
-        affine : bool
-            If `True` apply learned scale and shift transformation after normalization.
-        activation : str
-            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
-        slope : float
-            Negative slope for the `leaky_relu` activation.
-        """
-        super(ABN, self).__init__()
-        self.num_features = num_features
-        self.affine = affine
-        self.eps = eps
-        self.momentum = momentum
-        self.activation = activation
-        self.slope = slope
-        if self.affine:
-            self.weight = nn.Parameter(torch.ones(num_features))
-            self.bias = nn.Parameter(torch.zeros(num_features))
-        else:
-            self.register_parameter('weight', None)
-            self.register_parameter('bias', None)
-        self.register_buffer('running_mean', torch.zeros(num_features))
-        self.register_buffer('running_var', torch.ones(num_features))
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        nn.init.constant_(self.running_mean, 0)
-        nn.init.constant_(self.running_var, 1)
-        if self.affine:
-            nn.init.constant_(self.weight, 1)
-            nn.init.constant_(self.bias, 0)
-
-    def forward(self, x):
-        x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
-                                  self.training, self.momentum, self.eps)
-
-        if self.activation == ACT_RELU:
-            return functional.relu(x, inplace=True)
-        elif self.activation == ACT_LEAKY_RELU:
-            return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
-        elif self.activation == ACT_ELU:
-            return functional.elu(x, inplace=True)
-        else:
-            return x
-
-    def __repr__(self):
-        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
-              ' affine={affine}, activation={activation}'
-        if self.activation == "leaky_relu":
-            rep += ', slope={slope})'
-        else:
-            rep += ')'
-        return rep.format(name=self.__class__.__name__, **self.__dict__)
-
-
-class InPlaceABN(ABN):
-    """InPlace Activated Batch Normalization"""
-
-    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
-        """Creates an InPlace Activated Batch Normalization module
-
-        Parameters
-        ----------
-        num_features : int
-            Number of feature channels in the input and output.
-        eps : float
-            Small constant to prevent numerical issues.
-        momentum : float
-            Momentum factor applied to compute running statistics as.
-        affine : bool
-            If `True` apply learned scale and shift transformation after normalization.
-        activation : str
-            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
-        slope : float
-            Negative slope for the `leaky_relu` activation.
-        """
-        super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
-
-    def forward(self, x):
-        x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
-                           self.training, self.momentum, self.eps, self.activation, self.slope)
-        return x
-
-
-class InPlaceABNSync(ABN):
-    """InPlace Activated Batch Normalization with cross-GPU synchronization
-    This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
-    """
-
-    def forward(self, x):
-        x, _, _ =  inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
-                                   self.training, self.momentum, self.eps, self.activation, self.slope)
-        return x
-
-    def __repr__(self):
-        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
-              ' affine={affine}, activation={activation}'
-        if self.activation == "leaky_relu":
-            rep += ', slope={slope})'
-        else:
-            rep += ')'
-        return rep.format(name=self.__class__.__name__, **self.__dict__)
-
-
diff --git a/preprocess/humanparsing/modules/deeplab.py b/preprocess/humanparsing/modules/deeplab.py
deleted file mode 100644
index fd25b78..0000000
--- a/preprocess/humanparsing/modules/deeplab.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as functional
-
-from models._util import try_index
-from .bn import ABN
-
-
-class DeeplabV3(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 hidden_channels=256,
-                 dilations=(12, 24, 36),
-                 norm_act=ABN,
-                 pooling_size=None):
-        super(DeeplabV3, self).__init__()
-        self.pooling_size = pooling_size
-
-        self.map_convs = nn.ModuleList([
-            nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
-        ])
-        self.map_bn = norm_act(hidden_channels * 4)
-
-        self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
-        self.global_pooling_bn = norm_act(hidden_channels)
-
-        self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
-        self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
-        self.red_bn = norm_act(out_channels)
-
-        self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
-
-    def reset_parameters(self, activation, slope):
-        gain = nn.init.calculate_gain(activation, slope)
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.xavier_normal_(m.weight.data, gain)
-                if hasattr(m, "bias") and m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif isinstance(m, ABN):
-                if hasattr(m, "weight") and m.weight is not None:
-                    nn.init.constant_(m.weight, 1)
-                if hasattr(m, "bias") and m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        # Map convolutions
-        out = torch.cat([m(x) for m in self.map_convs], dim=1)
-        out = self.map_bn(out)
-        out = self.red_conv(out)
-
-        # Global pooling
-        pool = self._global_pooling(x)
-        pool = self.global_pooling_conv(pool)
-        pool = self.global_pooling_bn(pool)
-        pool = self.pool_red_conv(pool)
-        if self.training or self.pooling_size is None:
-            pool = pool.repeat(1, 1, x.size(2), x.size(3))
-
-        out += pool
-        out = self.red_bn(out)
-        return out
-
-    def _global_pooling(self, x):
-        if self.training or self.pooling_size is None:
-            pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
-            pool = pool.view(x.size(0), x.size(1), 1, 1)
-        else:
-            pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
-                            min(try_index(self.pooling_size, 1), x.shape[3]))
-            padding = (
-                (pooling_size[1] - 1) // 2,
-                (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
-                (pooling_size[0] - 1) // 2,
-                (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
-            )
-
-            pool = functional.avg_pool2d(x, pooling_size, stride=1)
-            pool = functional.pad(pool, pad=padding, mode="replicate")
-        return pool
diff --git a/preprocess/humanparsing/modules/dense.py b/preprocess/humanparsing/modules/dense.py
deleted file mode 100644
index 9638d6e..0000000
--- a/preprocess/humanparsing/modules/dense.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from collections import OrderedDict
-
-import torch
-import torch.nn as nn
-
-from .bn import ABN
-
-
-class DenseModule(nn.Module):
-    def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
-        super(DenseModule, self).__init__()
-        self.in_channels = in_channels
-        self.growth = growth
-        self.layers = layers
-
-        self.convs1 = nn.ModuleList()
-        self.convs3 = nn.ModuleList()
-        for i in range(self.layers):
-            self.convs1.append(nn.Sequential(OrderedDict([
-                ("bn", norm_act(in_channels)),
-                ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
-            ])))
-            self.convs3.append(nn.Sequential(OrderedDict([
-                ("bn", norm_act(self.growth * bottleneck_factor)),
-                ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
-                                   dilation=dilation))
-            ])))
-            in_channels += self.growth
-
-    @property
-    def out_channels(self):
-        return self.in_channels + self.growth * self.layers
-
-    def forward(self, x):
-        inputs = [x]
-        for i in range(self.layers):
-            x = torch.cat(inputs, dim=1)
-            x = self.convs1[i](x)
-            x = self.convs3[i](x)
-            inputs += [x]
-
-        return torch.cat(inputs, dim=1)
diff --git a/preprocess/humanparsing/modules/functions.py b/preprocess/humanparsing/modules/functions.py
deleted file mode 100644
index 4b28372..0000000
--- a/preprocess/humanparsing/modules/functions.py
+++ /dev/null
@@ -1,245 +0,0 @@
-import pdb
-from os import path
-import torch
-import torch.distributed as dist
-import torch.autograd as autograd
-import torch.cuda.comm as comm
-from torch.autograd.function import once_differentiable
-from torch.utils.cpp_extension import load
-
-_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
-_backend = load(name="inplace_abn",
-                extra_cflags=["-O3"],
-                sources=[path.join(_src_path, f) for f in [
-                    "inplace_abn.cpp",
-                    "inplace_abn_cpu.cpp",
-                    "inplace_abn_cuda.cu",
-                    "inplace_abn_cuda_half.cu"
-                ]],
-                extra_cuda_cflags=["--expt-extended-lambda"])
-
-# Activation names
-ACT_RELU = "relu"
-ACT_LEAKY_RELU = "leaky_relu"
-ACT_ELU = "elu"
-ACT_NONE = "none"
-
-
-def _check(fn, *args, **kwargs):
-    success = fn(*args, **kwargs)
-    if not success:
-        raise RuntimeError("CUDA Error encountered in {}".format(fn))
-
-
-def _broadcast_shape(x):
-    out_size = []
-    for i, s in enumerate(x.size()):
-        if i != 1:
-            out_size.append(1)
-        else:
-            out_size.append(s)
-    return out_size
-
-
-def _reduce(x):
-    if len(x.size()) == 2:
-        return x.sum(dim=0)
-    else:
-        n, c = x.size()[0:2]
-        return x.contiguous().view((n, c, -1)).sum(2).sum(0)
-
-
-def _count_samples(x):
-    count = 1
-    for i, s in enumerate(x.size()):
-        if i != 1:
-            count *= s
-    return count
-
-
-def _act_forward(ctx, x):
-    if ctx.activation == ACT_LEAKY_RELU:
-        _backend.leaky_relu_forward(x, ctx.slope)
-    elif ctx.activation == ACT_ELU:
-        _backend.elu_forward(x)
-    elif ctx.activation == ACT_NONE:
-        pass
-
-
-def _act_backward(ctx, x, dx):
-    if ctx.activation == ACT_LEAKY_RELU:
-        _backend.leaky_relu_backward(x, dx, ctx.slope)
-    elif ctx.activation == ACT_ELU:
-        _backend.elu_backward(x, dx)
-    elif ctx.activation == ACT_NONE:
-        pass
-
-
-class InPlaceABN(autograd.Function):
-    @staticmethod
-    def forward(ctx, x, weight, bias, running_mean, running_var,
-                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
-        # Save context
-        ctx.training = training
-        ctx.momentum = momentum
-        ctx.eps = eps
-        ctx.activation = activation
-        ctx.slope = slope
-        ctx.affine = weight is not None and bias is not None
-
-        # Prepare inputs
-        count = _count_samples(x)
-        x = x.contiguous()
-        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
-        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
-
-        if ctx.training:
-            mean, var = _backend.mean_var(x)
-
-            # Update running stats
-            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
-
-            # Mark in-place modified tensors
-            ctx.mark_dirty(x, running_mean, running_var)
-        else:
-            mean, var = running_mean.contiguous(), running_var.contiguous()
-            ctx.mark_dirty(x)
-
-        # BN forward + activation
-        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
-        _act_forward(ctx, x)
-
-        # Output
-        ctx.var = var
-        ctx.save_for_backward(x, var, weight, bias)
-        ctx.mark_non_differentiable(running_mean, running_var)
-        return x, running_mean, running_var
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, dz, _drunning_mean, _drunning_var):
-        z, var, weight, bias = ctx.saved_tensors
-        dz = dz.contiguous()
-
-        # Undo activation
-        _act_backward(ctx, z, dz)
-
-        if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
-        else:
-            # TODO: implement simplified CUDA backward for inference mode
-            edz = dz.new_zeros(dz.size(1))
-            eydz = dz.new_zeros(dz.size(1))
-
-        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
-        # dweight = eydz * weight.sign() if ctx.affine else None
-        dweight = eydz if ctx.affine else None
-        if dweight is not None:
-            dweight[weight < 0] *= -1
-        dbias = edz if ctx.affine else None
-
-        return dx, dweight, dbias, None, None, None, None, None, None, None
-
-
-class InPlaceABNSync(autograd.Function):
-    @classmethod
-    def forward(cls, ctx, x, weight, bias, running_mean, running_var,
-                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
-        # Save context
-        ctx.training = training
-        ctx.momentum = momentum
-        ctx.eps = eps
-        ctx.activation = activation
-        ctx.slope = slope
-        ctx.affine = weight is not None and bias is not None
-
-        # Prepare inputs
-        ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
-
-        # count = _count_samples(x)
-        batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)
-
-        x = x.contiguous()
-        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
-        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
-
-        if ctx.training:
-            mean, var = _backend.mean_var(x)
-            if ctx.world_size > 1:
-                # get global batch size
-                if equal_batches:
-                    batch_size *= ctx.world_size
-                else:
-                    dist.all_reduce(batch_size, dist.ReduceOp.SUM)
-
-                ctx.factor = x.shape[0] / float(batch_size.item())
-
-                mean_all = mean.clone() * ctx.factor
-                dist.all_reduce(mean_all, dist.ReduceOp.SUM)
-
-                var_all = (var + (mean - mean_all) ** 2) * ctx.factor
-                dist.all_reduce(var_all, dist.ReduceOp.SUM)
-
-                mean = mean_all
-                var = var_all
-
-            # Update running stats
-            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
-
-            # Mark in-place modified tensors
-            ctx.mark_dirty(x, running_mean, running_var)
-        else:
-            mean, var = running_mean.contiguous(), running_var.contiguous()
-            ctx.mark_dirty(x)
-
-        # BN forward + activation
-        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
-        _act_forward(ctx, x)
-
-        # Output
-        ctx.var = var
-        ctx.save_for_backward(x, var, weight, bias)
-        ctx.mark_non_differentiable(running_mean, running_var)
-        return x, running_mean, running_var
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, dz, _drunning_mean, _drunning_var):
-        z, var, weight, bias = ctx.saved_tensors
-        dz = dz.contiguous()
-
-        # Undo activation
-        _act_backward(ctx, z, dz)
-
-        if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
-            edz_local = edz.clone()
-            eydz_local = eydz.clone()
-
-            if ctx.world_size > 1:
-                edz *= ctx.factor
-                dist.all_reduce(edz, dist.ReduceOp.SUM)
-
-                eydz *= ctx.factor
-                dist.all_reduce(eydz, dist.ReduceOp.SUM)
-        else:
-            edz_local = edz = dz.new_zeros(dz.size(1))
-            eydz_local = eydz = dz.new_zeros(dz.size(1))
-
-        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
-        # dweight = eydz_local * weight.sign() if ctx.affine else None
-        dweight = eydz_local if ctx.affine else None
-        if dweight is not None:
-            dweight[weight < 0] *= -1
-        dbias = edz_local if ctx.affine else None
-
-        return dx, dweight, dbias, None, None, None, None, None, None, None
-
-
-inplace_abn = InPlaceABN.apply
-inplace_abn_sync = InPlaceABNSync.apply
-
-__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
diff --git a/preprocess/humanparsing/modules/misc.py b/preprocess/humanparsing/modules/misc.py
deleted file mode 100644
index 3c50b69..0000000
--- a/preprocess/humanparsing/modules/misc.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import torch.nn as nn
-import torch
-import torch.distributed as dist
-
-class GlobalAvgPool2d(nn.Module):
-    def __init__(self):
-        """Global average pooling over the input's spatial dimensions"""
-        super(GlobalAvgPool2d, self).__init__()
-
-    def forward(self, inputs):
-        in_size = inputs.size()
-        return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
-
-class SingleGPU(nn.Module):
-    def __init__(self, module):
-        super(SingleGPU, self).__init__()
-        self.module=module
-
-    def forward(self, input):
-        return self.module(input.cuda(non_blocking=True))
-
diff --git a/preprocess/humanparsing/modules/residual.py b/preprocess/humanparsing/modules/residual.py
deleted file mode 100644
index 8a5c90e..0000000
--- a/preprocess/humanparsing/modules/residual.py
+++ /dev/null
@@ -1,182 +0,0 @@
-from collections import OrderedDict
-
-import torch.nn as nn
-
-from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
-import torch.nn.functional as functional
-
-
-class ResidualBlock(nn.Module):
-    """Configurable residual block
-
-    Parameters
-    ----------
-    in_channels : int
-        Number of input channels.
-    channels : list of int
-        Number of channels in the internal feature maps. Can either have two or three elements: if three construct
-        a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
-        `3 x 3` then `1 x 1` convolutions.
-    stride : int
-        Stride of the first `3 x 3` convolution
-    dilation : int
-        Dilation to apply to the `3 x 3` convolutions.
-    groups : int
-        Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
-        bottleneck blocks.
-    norm_act : callable
-        Function to create normalization / activation Module.
-    dropout: callable
-        Function to create Dropout Module.
-    """
-
-    def __init__(self,
-                 in_channels,
-                 channels,
-                 stride=1,
-                 dilation=1,
-                 groups=1,
-                 norm_act=ABN,
-                 dropout=None):
-        super(ResidualBlock, self).__init__()
-
-        # Check parameters for inconsistencies
-        if len(channels) != 2 and len(channels) != 3:
-            raise ValueError("channels must contain either two or three values")
-        if len(channels) == 2 and groups != 1:
-            raise ValueError("groups > 1 are only valid if len(channels) == 3")
-
-        is_bottleneck = len(channels) == 3
-        need_proj_conv = stride != 1 or in_channels != channels[-1]
-
-        if not is_bottleneck:
-            bn2 = norm_act(channels[1])
-            bn2.activation = ACT_NONE
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
-                                    dilation=dilation)),
-                ("bn1", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
-                                    dilation=dilation)),
-                ("bn2", bn2)
-            ]
-            if dropout is not None:
-                layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
-        else:
-            bn3 = norm_act(channels[2])
-            bn3.activation = ACT_NONE
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)),
-                ("bn1", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False,
-                                    groups=groups, dilation=dilation)),
-                ("bn2", norm_act(channels[1])),
-                ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)),
-                ("bn3", bn3)
-            ]
-            if dropout is not None:
-                layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
-        self.convs = nn.Sequential(OrderedDict(layers))
-
-        if need_proj_conv:
-            self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
-            self.proj_bn = norm_act(channels[-1])
-            self.proj_bn.activation = ACT_NONE
-
-    def forward(self, x):
-        if hasattr(self, "proj_conv"):
-            residual = self.proj_conv(x)
-            residual = self.proj_bn(residual)
-        else:
-            residual = x
-        x = self.convs(x) + residual
-
-        if self.convs.bn1.activation == ACT_LEAKY_RELU:
-            return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True)
-        elif self.convs.bn1.activation == ACT_ELU:
-            return functional.elu(x, inplace=True)
-        else:
-            return x
-
-
-class IdentityResidualBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 channels,
-                 stride=1,
-                 dilation=1,
-                 groups=1,
-                 norm_act=ABN,
-                 dropout=None):
-        """Configurable identity-mapping residual block
-
-        Parameters
-        ----------
-        in_channels : int
-            Number of input channels.
-        channels : list of int
-            Number of channels in the internal feature maps. Can either have two or three elements: if three construct
-            a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
-            `3 x 3` then `1 x 1` convolutions.
-        stride : int
-            Stride of the first `3 x 3` convolution
-        dilation : int
-            Dilation to apply to the `3 x 3` convolutions.
-        groups : int
-            Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
-            bottleneck blocks.
-        norm_act : callable
-            Function to create normalization / activation Module.
-        dropout: callable
-            Function to create Dropout Module.
-        """
-        super(IdentityResidualBlock, self).__init__()
-
-        # Check parameters for inconsistencies
-        if len(channels) != 2 and len(channels) != 3:
-            raise ValueError("channels must contain either two or three values")
-        if len(channels) == 2 and groups != 1:
-            raise ValueError("groups > 1 are only valid if len(channels) == 3")
-
-        is_bottleneck = len(channels) == 3
-        need_proj_conv = stride != 1 or in_channels != channels[-1]
-
-        self.bn1 = norm_act(in_channels)
-        if not is_bottleneck:
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
-                                    dilation=dilation)),
-                ("bn2", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
-                                    dilation=dilation))
-            ]
-            if dropout is not None:
-                layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
-        else:
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
-                ("bn2", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
-                                    groups=groups, dilation=dilation)),
-                ("bn3", norm_act(channels[1])),
-                ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
-            ]
-            if dropout is not None:
-                layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
-        self.convs = nn.Sequential(OrderedDict(layers))
-
-        if need_proj_conv:
-            self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
-
-    def forward(self, x):
-        if hasattr(self, "proj_conv"):
-            bn1 = self.bn1(x)
-            shortcut = self.proj_conv(bn1)
-        else:
-            shortcut = x.clone()
-            bn1 = self.bn1(x)
-
-        out = self.convs(bn1)
-        out.add_(shortcut)
-
-        return out
diff --git a/preprocess/humanparsing/modules/src/checks.h b/preprocess/humanparsing/modules/src/checks.h
deleted file mode 100644
index e761a6f..0000000
--- a/preprocess/humanparsing/modules/src/checks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
-#ifndef AT_CHECK
-#define AT_CHECK AT_ASSERT
-#endif
-
-#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
-#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
-
-#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
-#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/preprocess/humanparsing/modules/src/inplace_abn.cpp b/preprocess/humanparsing/modules/src/inplace_abn.cpp
deleted file mode 100644
index 0a6b112..0000000
--- a/preprocess/humanparsing/modules/src/inplace_abn.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <torch/extension.h>
-
-#include <vector>
-
-#include "inplace_abn.h"
-
-std::vector<at::Tensor> mean_var(at::Tensor x) {
-  if (x.is_cuda()) {
-    if (x.type().scalarType() == at::ScalarType::Half) {
-      return mean_var_cuda_h(x);
-    } else {
-      return mean_var_cuda(x);
-    }
-  } else {
-    return mean_var_cpu(x);
-  }
-}
-
-at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                   bool affine, float eps) {
-  if (x.is_cuda()) {
-    if (x.type().scalarType() == at::ScalarType::Half) {
-      return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
-    } else {
-      return forward_cuda(x, mean, var, weight, bias, affine, eps);
-    }
-  } else {
-    return forward_cpu(x, mean, var, weight, bias, affine, eps);
-  }
-}
-
-std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                 bool affine, float eps) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
-    } else {
-      return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
-	}
-  } else {
-    return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
-  }
-}
-
-at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                 at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
-	} else {
-      return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
-    }
-  } else {
-    return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
-  }
-}
-
-void leaky_relu_forward(at::Tensor z, float slope) {
-  at::leaky_relu_(z, slope);
-}
-
-void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return leaky_relu_backward_cuda_h(z, dz, slope);
-	} else {
-      return leaky_relu_backward_cuda(z, dz, slope);
-    }
-  } else {
-    return leaky_relu_backward_cpu(z, dz, slope);
-  }
-}
-
-void elu_forward(at::Tensor z) {
-  at::elu_(z);
-}
-
-void elu_backward(at::Tensor z, at::Tensor dz) {
-  if (z.is_cuda()) {
-    return elu_backward_cuda(z, dz);
-  } else {
-    return elu_backward_cpu(z, dz);
-  }
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("mean_var", &mean_var, "Mean and variance computation");
-  m.def("forward", &forward, "In-place forward computation");
-  m.def("edz_eydz", &edz_eydz, "First part of backward computation");
-  m.def("backward", &backward, "Second part of backward computation");
-  m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
-  m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
-  m.def("elu_forward", &elu_forward, "Elu forward computation");
-  m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
-}
diff --git a/preprocess/humanparsing/modules/src/inplace_abn.h b/preprocess/humanparsing/modules/src/inplace_abn.h
deleted file mode 100644
index 17afd11..0000000
--- a/preprocess/humanparsing/modules/src/inplace_abn.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-#include <vector>
-
-std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
-std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
-std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
-
-at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                       bool affine, float eps);
-at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps);
-at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                          bool affine, float eps);
-
-std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                     bool affine, float eps);
-std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps);
-std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                        bool affine, float eps);
-
-at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                        at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-
-void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
-void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
-void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
-
-void elu_backward_cpu(at::Tensor z, at::Tensor dz);
-void elu_backward_cuda(at::Tensor z, at::Tensor dz);
-
-static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
-  num = x.size(0);
-  chn = x.size(1);
-  sp = 1;
-  for (int64_t i = 2; i < x.ndimension(); ++i)
-    sp *= x.size(i);
-}
-
-/*
- * Specialized CUDA reduction functions for BN
- */
-#ifdef __CUDACC__
-
-#include "utils/cuda.cuh"
-
-template <typename T, typename Op>
-__device__ T reduce(Op op, int plane, int N, int S) {
-  T sum = (T)0;
-  for (int batch = 0; batch < N; ++batch) {
-    for (int x = threadIdx.x; x < S; x += blockDim.x) {
-      sum += op(batch, plane, x);
-    }
-  }
-
-  // sum over NumThreads within a warp
-  sum = warpSum(sum);
-
-  // 'transpose', and reduce within warp again
-  __shared__ T shared[32];
-  __syncthreads();
-  if (threadIdx.x % WARP_SIZE == 0) {
-    shared[threadIdx.x / WARP_SIZE] = sum;
-  }
-  if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
-    // zero out the other entries in shared
-    shared[threadIdx.x] = (T)0;
-  }
-  __syncthreads();
-  if (threadIdx.x / WARP_SIZE == 0) {
-    sum = warpSum(shared[threadIdx.x]);
-    if (threadIdx.x == 0) {
-      shared[0] = sum;
-    }
-  }
-  __syncthreads();
-
-  // Everyone picks it up, should be broadcast into the whole gradInput
-  return shared[0];
-}
-#endif
diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp b/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp
deleted file mode 100644
index ffc6d38..0000000
--- a/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "inplace_abn.h"
-
-at::Tensor reduce_sum(at::Tensor x) {
-  if (x.ndimension() == 2) {
-    return x.sum(0);
-  } else {
-    auto x_view = x.view({x.size(0), x.size(1), -1});
-    return x_view.sum(-1).sum(0);
-  }
-}
-
-at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
-  if (x.ndimension() == 2) {
-    return v;
-  } else {
-    std::vector<int64_t> broadcast_size = {1, -1};
-    for (int64_t i = 2; i < x.ndimension(); ++i)
-      broadcast_size.push_back(1);
-
-    return v.view(broadcast_size);
-  }
-}
-
-int64_t count(at::Tensor x) {
-  int64_t count = x.size(0);
-  for (int64_t i = 2; i < x.ndimension(); ++i)
-    count *= x.size(i);
-
-  return count;
-}
-
-at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
-  if (affine) {
-    return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
-  } else {
-    return z;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
-  auto num = count(x);
-  auto mean = reduce_sum(x) / num;
-  auto diff = x - broadcast_to(mean, x);
-  auto var = reduce_sum(diff.pow(2)) / num;
-
-  return {mean, var};
-}
-
-at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                       bool affine, float eps) {
-  auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
-  auto mul = at::rsqrt(var + eps) * gamma;
-
-  x.sub_(broadcast_to(mean, x));
-  x.mul_(broadcast_to(mul, x));
-  if (affine) x.add_(broadcast_to(bias, x));
-
-  return x;
-}
-
-std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                     bool affine, float eps) {
-  auto edz = reduce_sum(dz);
-  auto y = invert_affine(z, weight, bias, affine, eps);
-  auto eydz = reduce_sum(y * dz);
-
-  return {edz, eydz};
-}
-
-at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  auto y = invert_affine(z, weight, bias, affine, eps);
-  auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
-
-  auto num = count(z);
-  auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
-  return dx;
-}
-
-void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CPU_INPUT(z);
-  CHECK_CPU_INPUT(dz);
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
-    int64_t count = z.numel();
-    auto *_z = z.data<scalar_t>();
-    auto *_dz = dz.data<scalar_t>();
-
-    for (int64_t i = 0; i < count; ++i) {
-      if (_z[i] < 0) {
-        _z[i] *= 1 / slope;
-        _dz[i] *= slope;
-      }
-    }
-  }));
-}
-
-void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
-  CHECK_CPU_INPUT(z);
-  CHECK_CPU_INPUT(dz);
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
-    int64_t count = z.numel();
-    auto *_z = z.data<scalar_t>();
-    auto *_dz = dz.data<scalar_t>();
-
-    for (int64_t i = 0; i < count; ++i) {
-      if (_z[i] < 0) {
-        _z[i] = log1p(_z[i]);
-        _dz[i] *= (_z[i] + 1.f);
-      }
-    }
-  }));
-}
diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu b/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu
deleted file mode 100644
index b157b06..0000000
--- a/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu
+++ /dev/null
@@ -1,333 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <thrust/device_ptr.h>
-#include <thrust/transform.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "utils/cuda.cuh"
-#include "inplace_abn.h"
-
-#include <ATen/cuda/CUDAContext.h>
-
-// Operations for reduce
-template<typename T>
-struct SumOp {
-  __device__ SumOp(const T *t, int c, int s)
-      : tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
-    return tensor[(batch * chn + plane) * sp + n];
-  }
-  const T *tensor;
-  const int chn;
-  const int sp;
-};
-
-template<typename T>
-struct VarOp {
-  __device__ VarOp(T m, const T *t, int c, int s)
-      : mean(m), tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
-    T val = tensor[(batch * chn + plane) * sp + n];
-    return (val - mean) * (val - mean);
-  }
-  const T mean;
-  const T *tensor;
-  const int chn;
-  const int sp;
-};
-
-template<typename T>
-struct GradOp {
-  __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
-      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
-  __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
-    T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
-    T _dz = dz[(batch * chn + plane) * sp + n];
-    return Pair<T>(_dz, _y * _dz);
-  }
-  const T weight;
-  const T bias;
-  const T *z;
-  const T *dz;
-  const int chn;
-  const int sp;
-};
-
-/***********
- * mean_var
- ***********/
-
-template<typename T>
-__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-  T norm = T(1) / T(num * sp);
-
-  T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
-  __syncthreads();
-  T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
-
-  if (threadIdx.x == 0) {
-    mean[plane] = _mean;
-    var[plane] = _var;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
-  CHECK_CUDA_INPUT(x);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Prepare output tensors
-  auto mean = at::empty({chn}, x.options());
-  auto var = at::empty({chn}, x.options());
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
-    mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        x.data<scalar_t>(),
-        mean.data<scalar_t>(),
-        var.data<scalar_t>(),
-        num, chn, sp);
-  }));
-
-  return {mean, var};
-}
-
-/**********
- * forward
- **********/
-
-template<typename T>
-__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
-                               bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _mean = mean[plane];
-  T _var = var[plane];
-  T _weight = affine ? abs(weight[plane]) + eps : T(1);
-  T _bias = affine ? bias[plane] : T(0);
-
-  T mul = rsqrt(_var + eps) * _weight;
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      T _x = x[(batch * chn + plane) * sp + n];
-      T _y = (_x - _mean) * mul + _bias;
-
-      x[(batch * chn + plane) * sp + n] = _y;
-    }
-  }
-}
-
-at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps) {
-  CHECK_CUDA_INPUT(x);
-  CHECK_CUDA_INPUT(mean);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
-    forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        x.data<scalar_t>(),
-        mean.data<scalar_t>(),
-        var.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return x;
-}
-
-/***********
- * edz_eydz
- ***********/
-
-template<typename T>
-__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
-                                T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  T _bias = affine ? bias[plane] : 0.f;
-
-  Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
-  __syncthreads();
-
-  if (threadIdx.x == 0) {
-    edz[plane] = res.v1;
-    eydz[plane] = res.v2;
-  }
-}
-
-std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto edz = at::empty({chn}, z.options());
-  auto eydz = at::empty({chn}, z.options());
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
-    edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        z.data<scalar_t>(),
-        dz.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        edz.data<scalar_t>(),
-        eydz.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return {edz, eydz};
-}
-
-/***********
- * backward
- ***********/
-
-template<typename T>
-__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
-	                        const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  T _bias = affine ? bias[plane] : 0.f;
-  T _var = var[plane];
-  T _edz = edz[plane];
-  T _eydz = eydz[plane];
-
-  T _mul = _weight * rsqrt(_var + eps);
-  T count = T(num * sp);
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      T _dz = dz[(batch * chn + plane) * sp + n];
-      T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
-
-      dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
-    }
-  }
-}
-
-at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-  CHECK_CUDA_INPUT(edz);
-  CHECK_CUDA_INPUT(eydz);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto dx = at::zeros_like(z);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
-    backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        z.data<scalar_t>(),
-        dz.data<scalar_t>(),
-        var.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        edz.data<scalar_t>(),
-        eydz.data<scalar_t>(),
-        dx.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return dx;
-}
-
-/**************
- * activations
- **************/
-
-template<typename T>
-inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
-  // Create thrust pointers
-  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
-  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
-
-  auto stream = at::cuda::getCurrentCUDAStream();
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_dz, th_dz + count, th_z, th_dz,
-                       [slope] __device__ (const T& dz) { return dz * slope; },
-                       [] __device__ (const T& z) { return z < 0; });
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_z, th_z + count, th_z,
-                       [slope] __device__ (const T& z) { return z / slope; },
-                       [] __device__ (const T& z) { return z < 0; });
-}
-
-void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
-    leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
-  }));
-}
-
-template<typename T>
-inline void elu_backward_impl(T *z, T *dz, int64_t count) {
-  // Create thrust pointers
-  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
-  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
-
-  auto stream = at::cuda::getCurrentCUDAStream();
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_dz, th_dz + count, th_z, th_z, th_dz,
-                       [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
-                       [] __device__ (const T& z) { return z < 0; });
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_z, th_z + count, th_z,
-                       [] __device__ (const T& z) { return log1p(z); },
-                       [] __device__ (const T& z) { return z < 0; });
-}
-
-void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
-    elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
-  }));
-}
diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu b/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu
deleted file mode 100644
index bb63e73..0000000
--- a/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu
+++ /dev/null
@@ -1,275 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <cuda_fp16.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "utils/cuda.cuh"
-#include "inplace_abn.h"
-
-#include <ATen/cuda/CUDAContext.h>
-
-// Operations for reduce
-struct SumOpH {
-  __device__ SumOpH(const half *t, int c, int s)
-      : tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
-    return __half2float(tensor[(batch * chn + plane) * sp + n]);
-  }
-  const half *tensor;
-  const int chn;
-  const int sp;
-};
-
-struct VarOpH {
-  __device__ VarOpH(float m, const half *t, int c, int s)
-      : mean(m), tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
-    const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
-    return (t - mean) * (t - mean);
-  }
-  const float mean;
-  const half *tensor;
-  const int chn;
-  const int sp;
-};
-
-struct GradOpH {
-  __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
-      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
-  __device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
-    float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
-    float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
-    return Pair<float>(_dz, _y * _dz);
-  }
-  const float weight;
-  const float bias;
-  const half *z;
-  const half *dz;
-  const int chn;
-  const int sp;
-};
-
-/***********
- * mean_var
- ***********/
-
-__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-  float norm = 1.f / static_cast<float>(num * sp);
-
-  float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
-  __syncthreads();
-  float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
-
-  if (threadIdx.x == 0) {
-    mean[plane] = _mean;
-    var[plane] = _var;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
-  CHECK_CUDA_INPUT(x);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Prepare output tensors
-  auto mean = at::empty({chn},x.options().dtype(at::kFloat));
-  auto var = at::empty({chn},x.options().dtype(at::kFloat));
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(x.data<at::Half>()),
-      mean.data<float>(),
-      var.data<float>(),
-      num, chn, sp);
-
-  return {mean, var};
-}
-
-/**********
- * forward
- **********/
-
-__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
-                                 bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  const float _mean = mean[plane];
-  const float _var = var[plane];
-  const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  const float _bias = affine ? bias[plane] : 0.f;
-
-  const float mul = rsqrt(_var + eps) * _weight;
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      half *x_ptr = x + (batch * chn + plane) * sp + n;
-      float _x = __half2float(*x_ptr);
-      float _y = (_x - _mean) * mul + _bias;
-
-      *x_ptr = __float2half(_y);
-    }
-  }
-}
-
-at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps) {
-  CHECK_CUDA_INPUT(x);
-  CHECK_CUDA_INPUT(mean);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  forward_kernel_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(x.data<at::Half>()),
-      mean.data<float>(),
-      var.data<float>(),
-      weight.data<float>(),
-      bias.data<float>(),
-      affine, eps, num, chn, sp);
-
-  return x;
-}
-
-__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
-                                float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  float _bias = affine ? bias[plane] : 0.f;
-
-  Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
-  __syncthreads();
-
-  if (threadIdx.x == 0) {
-    edz[plane] = res.v1;
-    eydz[plane] = res.v2;
-  }
-}
-
-std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto edz = at::empty({chn},z.options().dtype(at::kFloat));
-  auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
-        reinterpret_cast<half*>(z.data<at::Half>()),
-        reinterpret_cast<half*>(dz.data<at::Half>()),
-        weight.data<float>(),
-        bias.data<float>(),
-        edz.data<float>(),
-        eydz.data<float>(),
-        affine, eps, num, chn, sp);
- 
-  return {edz, eydz};
-}
-
-__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
-                                  const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  float _bias = affine ? bias[plane] : 0.f;
-  float _var = var[plane];
-  float _edz = edz[plane];
-  float _eydz = eydz[plane];
-
-  float _mul = _weight * rsqrt(_var + eps);
-  float count = float(num * sp);
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
-      float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
-
-      dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
-    }
-  }
-}
-
-at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-  CHECK_CUDA_INPUT(edz);
-  CHECK_CUDA_INPUT(eydz);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto dx = at::zeros_like(z);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  backward_kernel_h<<<blocks, threads, 0, stream>>>(
-        reinterpret_cast<half*>(z.data<at::Half>()),
-        reinterpret_cast<half*>(dz.data<at::Half>()),
-        var.data<float>(),
-        weight.data<float>(),
-        bias.data<float>(),
-        edz.data<float>(),
-        eydz.data<float>(),
-        reinterpret_cast<half*>(dx.data<at::Half>()),
-        affine, eps, num, chn, sp);
-
-  return dx;
-}
-
-__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count;  i += blockDim.x * gridDim.x){
-    float _z = __half2float(z[i]);
-    if (_z < 0) {
-      dz[i] = __float2half(__half2float(dz[i]) * slope);
-      z[i] = __float2half(_z / slope);
-    }
-  }
-}
-
-void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-  dim3 threads(getNumThreads(count));
-  dim3 blocks = (count + threads.x - 1) / threads.x;
-  auto stream = at::cuda::getCurrentCUDAStream();
-  leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(z.data<at::Half>()),
-      reinterpret_cast<half*>(dz.data<at::Half>()),
-      slope, count);
-}
-
diff --git a/preprocess/humanparsing/modules/src/utils/checks.h b/preprocess/humanparsing/modules/src/utils/checks.h
deleted file mode 100644
index e761a6f..0000000
--- a/preprocess/humanparsing/modules/src/utils/checks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
-#ifndef AT_CHECK
-#define AT_CHECK AT_ASSERT
-#endif
-
-#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
-#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
-
-#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
-#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/preprocess/humanparsing/modules/src/utils/common.h b/preprocess/humanparsing/modules/src/utils/common.h
deleted file mode 100644
index e8403ee..0000000
--- a/preprocess/humanparsing/modules/src/utils/common.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-/*
- * Functions to share code between CPU and GPU
- */
-
-#ifdef __CUDACC__
-// CUDA versions
-
-#define HOST_DEVICE __host__ __device__
-#define INLINE_HOST_DEVICE __host__ __device__ inline
-#define FLOOR(x) floor(x)
-
-#if __CUDA_ARCH__ >= 600
-// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
-#define ACCUM(x,y) atomicAdd_block(&(x),(y))
-#else
-// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
-// and use the known atomicCAS-based implementation for double
-template<typename data_t>
-__device__ inline data_t atomic_add(data_t *address, data_t val) {
-  return atomicAdd(address, val);
-}
-
-template<>
-__device__ inline double atomic_add(double *address, double val) {
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
-  unsigned long long int old = *address_as_ull, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-#define ACCUM(x,y) atomic_add(&(x),(y))
-#endif // #if __CUDA_ARCH__ >= 600
-
-#else
-// CPU versions
-
-#define HOST_DEVICE
-#define INLINE_HOST_DEVICE inline
-#define FLOOR(x) std::floor(x)
-#define ACCUM(x,y) (x) += (y)
-
-#endif // #ifdef __CUDACC__
\ No newline at end of file
diff --git a/preprocess/humanparsing/modules/src/utils/cuda.cuh b/preprocess/humanparsing/modules/src/utils/cuda.cuh
deleted file mode 100644
index 60c0023..0000000
--- a/preprocess/humanparsing/modules/src/utils/cuda.cuh
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-
-/*
- * General settings and functions
- */
-const int WARP_SIZE = 32;
-const int MAX_BLOCK_SIZE = 1024;
-
-static int getNumThreads(int nElem) {
-  int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
-  for (int i = 0; i < 6; ++i) {
-    if (nElem <= threadSizes[i]) {
-      return threadSizes[i];
-    }
-  }
-  return MAX_BLOCK_SIZE;
-}
-
-/*
- * Reduction utilities
- */
-template <typename T>
-__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
-                                           unsigned int mask = 0xffffffff) {
-#if CUDART_VERSION >= 9000
-  return __shfl_xor_sync(mask, value, laneMask, width);
-#else
-  return __shfl_xor(value, laneMask, width);
-#endif
-}
-
-__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
-
-template<typename T>
-struct Pair {
-  T v1, v2;
-  __device__ Pair() {}
-  __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
-  __device__ Pair(T v) : v1(v), v2(v) {}
-  __device__ Pair(int v) : v1(v), v2(v) {}
-  __device__ Pair &operator+=(const Pair<T> &a) {
-    v1 += a.v1;
-    v2 += a.v2;
-    return *this;
-  }
-};
-
-template<typename T>
-static __device__ __forceinline__ T warpSum(T val) {
-#if __CUDA_ARCH__ >= 300
-  for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
-    val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
-  }
-#else
-  __shared__ T values[MAX_BLOCK_SIZE];
-  values[threadIdx.x] = val;
-  __threadfence_block();
-  const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
-  for (int i = 1; i < WARP_SIZE; i++) {
-    val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
-  }
-#endif
-  return val;
-}
-
-template<typename T>
-static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
-  value.v1 = warpSum(value.v1);
-  value.v2 = warpSum(value.v2);
-  return value;
-}
\ No newline at end of file
diff --git a/preprocess/humanparsing/networks/AugmentCE2P.py b/preprocess/humanparsing/networks/AugmentCE2P.py
deleted file mode 100644
index ce32f78..0000000
--- a/preprocess/humanparsing/networks/AugmentCE2P.py
+++ /dev/null
@@ -1,388 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   AugmentCE2P.py
-@Time    :   8/4/19 3:35 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import functools
-import pdb
-
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
-# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
-from modules import InPlaceABNSync
-import numpy as np
-
-BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
-
-affine_par = True
-
-pretrained_settings = {
-    'resnet101': {
-        'imagenet': {
-            'input_space': 'BGR',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.406, 0.456, 0.485],
-            'std': [0.225, 0.224, 0.229],
-            'num_classes': 1000
-        }
-    },
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    "3x3 convolution with padding"
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
-        self.bn2 = BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm2d(planes * 4)
-        self.relu = nn.ReLU(inplace=False)
-        self.relu_inplace = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.dilation = dilation
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out = out + residual
-        out = self.relu_inplace(out)
-
-        return out
-
-
-class CostomAdaptiveAvgPool2D(nn.Module):
-
-    def __init__(self, output_size):
-
-        super(CostomAdaptiveAvgPool2D, self).__init__()
-
-        self.output_size = output_size
-
-    def forward(self, x):
-
-        H_in, W_in = x.shape[-2:]
-        H_out, W_out = self.output_size
-
-        out_i = []
-        for i in range(H_out):
-            out_j = []
-            for j in range(W_out):
-                hs = int(np.floor(i * H_in / H_out))
-                he = int(np.ceil((i + 1) * H_in / H_out))
-
-                ws = int(np.floor(j * W_in / W_out))
-                we = int(np.ceil((j + 1) * W_in / W_out))
-
-                # print(hs, he, ws, we)
-                kernel_size = [he - hs, we - ws]
-
-                out = F.avg_pool2d(x[:, :, hs:he, ws:we], kernel_size)
-                out_j.append(out)
-
-            out_j = torch.concat(out_j, -1)
-            out_i.append(out_j)
-
-        out_i = torch.concat(out_i, -2)
-        return out_i
-
-
-class PSPModule(nn.Module):
-    """
-    Reference:
-        Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
-    """
-
-    def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
-        super(PSPModule, self).__init__()
-
-        self.stages = []
-        tmp = []
-        for size in sizes:
-            if size == 3 or size == 6:
-                tmp.append(self._make_stage_custom(features, out_features, size))
-            else:
-                tmp.append(self._make_stage(features, out_features, size))
-        self.stages = nn.ModuleList(tmp)
-        # self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
-        self.bottleneck = nn.Sequential(
-            nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
-                      bias=False),
-            InPlaceABNSync(out_features),
-        )
-
-    def _make_stage(self, features, out_features, size):
-        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
-        conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
-        bn = InPlaceABNSync(out_features)
-        return nn.Sequential(prior, conv, bn)
-
-    def _make_stage_custom(self, features, out_features, size):
-        prior = CostomAdaptiveAvgPool2D(output_size=(size, size))
-        conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
-        bn = InPlaceABNSync(out_features)
-        return nn.Sequential(prior, conv, bn)
-
-    def forward(self, feats):
-        h, w = feats.size(2), feats.size(3)
-        priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
-                  self.stages] + [feats]
-        bottle = self.bottleneck(torch.cat(priors, 1))
-        return bottle
-
-
-class ASPPModule(nn.Module):
-    """
-    Reference: 
-        Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
-    """
-
-    def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
-        super(ASPPModule, self).__init__()
-
-        self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
-                                   nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
-                                             bias=False),
-                                   InPlaceABNSync(inner_features))
-        self.conv2 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv4 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv5 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
-            InPlaceABNSync(inner_features))
-
-        self.bottleneck = nn.Sequential(
-            nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(out_features),
-            nn.Dropout2d(0.1)
-        )
-
-    def forward(self, x):
-        _, _, h, w = x.size()
-
-        feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
-
-        feat2 = self.conv2(x)
-        feat3 = self.conv3(x)
-        feat4 = self.conv4(x)
-        feat5 = self.conv5(x)
-        out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
-
-        bottle = self.bottleneck(out)
-        return bottle
-
-
-class Edge_Module(nn.Module):
-    """
-    Edge Learning Branch
-    """
-
-    def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
-        super(Edge_Module, self).__init__()
-
-        self.conv1 = nn.Sequential(
-            nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(mid_fea)
-        )
-        self.conv2 = nn.Sequential(
-            nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(mid_fea)
-        )
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(mid_fea)
-        )
-        self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
-        self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
-
-    def forward(self, x1, x2, x3):
-        _, _, h, w = x1.size()
-
-        edge1_fea = self.conv1(x1)
-        edge1 = self.conv4(edge1_fea)
-        edge2_fea = self.conv2(x2)
-        edge2 = self.conv4(edge2_fea)
-        edge3_fea = self.conv3(x3)
-        edge3 = self.conv4(edge3_fea)
-
-        edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
-        edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
-        edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
-        edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
-
-        edge = torch.cat([edge1, edge2, edge3], dim=1)
-        edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
-        edge = self.conv5(edge)
-
-        return edge, edge_fea
-
-
-class Decoder_Module(nn.Module):
-    """
-    Parsing Branch Decoder Module.
-    """
-
-    def __init__(self, num_classes):
-        super(Decoder_Module, self).__init__()
-        self.conv1 = nn.Sequential(
-            nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(256)
-        )
-        self.conv2 = nn.Sequential(
-            nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(48)
-        )
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(256),
-            nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(256)
-        )
-
-        self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
-
-    def forward(self, xt, xl):
-        _, _, h, w = xl.size()
-        xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
-        xl = self.conv2(xl)
-        x = torch.cat([xt, xl], dim=1)
-        x = self.conv3(x)
-        seg = self.conv4(x)
-        return seg, x
-
-
-class ResNet(nn.Module):
-    def __init__(self, block, layers, num_classes):
-        self.inplanes = 128
-        super(ResNet, self).__init__()
-        self.conv1 = conv3x3(3, 64, stride=2)
-        self.bn1 = BatchNorm2d(64)
-        self.relu1 = nn.ReLU(inplace=False)
-        self.conv2 = conv3x3(64, 64)
-        self.bn2 = BatchNorm2d(64)
-        self.relu2 = nn.ReLU(inplace=False)
-        self.conv3 = conv3x3(64, 128)
-        self.bn3 = BatchNorm2d(128)
-        self.relu3 = nn.ReLU(inplace=False)
-
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
-
-        self.context_encoding = PSPModule(2048, 512)
-
-        self.edge = Edge_Module()
-        self.decoder = Decoder_Module(num_classes)
-
-        self.fushion = nn.Sequential(
-            nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(256),
-            nn.Dropout2d(0.1),
-            nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
-        )
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm2d(planes * block.expansion, affine=affine_par))
-
-        layers = []
-        generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
-        layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
-                            multi_grid=generate_multi_grid(0, multi_grid)))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(
-                block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.relu1(self.bn1(self.conv1(x)))
-        x = self.relu2(self.bn2(self.conv2(x)))
-        x = self.relu3(self.bn3(self.conv3(x)))
-        x = self.maxpool(x)
-        x2 = self.layer1(x)
-        x3 = self.layer2(x2)
-        x4 = self.layer3(x3)
-        x5 = self.layer4(x4)
-        x = self.context_encoding(x5)
-        parsing_result, parsing_fea = self.decoder(x, x2)
-        # Edge Branch
-        edge_result, edge_fea = self.edge(x2, x3, x4)
-        # Fusion Branch
-        x = torch.cat([parsing_fea, edge_fea], dim=1)
-        fusion_result = self.fushion(x)
-        return [[parsing_result, fusion_result], edge_result]
-
-
-def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
-    model.input_space = settings['input_space']
-    model.input_size = settings['input_size']
-    model.input_range = settings['input_range']
-    model.mean = settings['mean']
-    model.std = settings['std']
-
-    if pretrained is not None:
-        saved_state_dict = torch.load(pretrained)
-        new_params = model.state_dict().copy()
-        for i in saved_state_dict:
-            i_parts = i.split('.')
-            if not i_parts[0] == 'fc':
-                new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
-        model.load_state_dict(new_params)
-
-
-def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
-    model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
-    settings = pretrained_settings['resnet101']['imagenet']
-    initialize_pretrained_model(model, settings, pretrained)
-    return model
diff --git a/preprocess/humanparsing/networks/__init__.py b/preprocess/humanparsing/networks/__init__.py
deleted file mode 100644
index 3d5d384..0000000
--- a/preprocess/humanparsing/networks/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import
-from networks.AugmentCE2P import resnet101
-
-__factory = {
-    'resnet101': resnet101,
-}
-
-
-def init_model(name, *args, **kwargs):
-    if name not in __factory.keys():
-        raise KeyError("Unknown model arch: {}".format(name))
-    return __factory[name](*args, **kwargs)
\ No newline at end of file
diff --git a/preprocess/humanparsing/networks/backbone/mobilenetv2.py b/preprocess/humanparsing/networks/backbone/mobilenetv2.py
deleted file mode 100644
index 6f2fe34..0000000
--- a/preprocess/humanparsing/networks/backbone/mobilenetv2.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   mobilenetv2.py
-@Time    :   8/4/19 3:35 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import torch.nn as nn
-import math
-import functools
-
-from modules import InPlaceABN, InPlaceABNSync
-
-BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
-
-__all__ = ['mobilenetv2']
-
-
-def conv_bn(inp, oup, stride):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
-        BatchNorm2d(oup),
-        nn.ReLU6(inplace=True)
-    )
-
-
-def conv_1x1_bn(inp, oup):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
-        BatchNorm2d(oup),
-        nn.ReLU6(inplace=True)
-    )
-
-
-class InvertedResidual(nn.Module):
-    def __init__(self, inp, oup, stride, expand_ratio):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = round(inp * expand_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-
-        if expand_ratio == 1:
-            self.conv = nn.Sequential(
-                # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
-                BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
-                BatchNorm2d(oup),
-            )
-        else:
-            self.conv = nn.Sequential(
-                # pw
-                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
-                BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
-                BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
-                BatchNorm2d(oup),
-            )
-
-    def forward(self, x):
-        if self.use_res_connect:
-            return x + self.conv(x)
-        else:
-            return self.conv(x)
-
-
-class MobileNetV2(nn.Module):
-    def __init__(self, n_class=1000, input_size=224, width_mult=1.):
-        super(MobileNetV2, self).__init__()
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        interverted_residual_setting = [
-            # t, c, n, s
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],  # layer 2
-            [6, 32, 3, 2],  # layer 3
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],  # layer 4
-            [6, 160, 3, 2],
-            [6, 320, 1, 1],  # layer 5
-        ]
-
-        # building first layer
-        assert input_size % 32 == 0
-        input_channel = int(input_channel * width_mult)
-        self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
-        self.features = [conv_bn(3, input_channel, 2)]
-        # building inverted residual blocks
-        for t, c, n, s in interverted_residual_setting:
-            output_channel = int(c * width_mult)
-            for i in range(n):
-                if i == 0:
-                    self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
-                else:
-                    self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
-                input_channel = output_channel
-        # building last several layers
-        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
-        # make it nn.Sequential
-        self.features = nn.Sequential(*self.features)
-
-        # building classifier
-        self.classifier = nn.Sequential(
-            nn.Dropout(0.2),
-            nn.Linear(self.last_channel, n_class),
-        )
-
-        self._initialize_weights()
-
-    def forward(self, x):
-        x = self.features(x)
-        x = x.mean(3).mean(2)
-        x = self.classifier(x)
-        return x
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                n = m.weight.size(1)
-                m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
-
-
-def mobilenetv2(pretrained=False, **kwargs):
-    """Constructs a MobileNet_V2 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = MobileNetV2(n_class=1000, **kwargs)
-    if pretrained:
-        model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
-    return model
diff --git a/preprocess/humanparsing/networks/backbone/resnet.py b/preprocess/humanparsing/networks/backbone/resnet.py
deleted file mode 100644
index 88d6f73..0000000
--- a/preprocess/humanparsing/networks/backbone/resnet.py
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   resnet.py
-@Time    :   8/4/19 3:35 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import functools
-import torch.nn as nn
-import math
-from torch.utils.model_zoo import load_url
-
-from modules import InPlaceABNSync
-
-BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
-
-__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101']  # resnet101 is coming soon!
-
-model_urls = {
-    'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
-    'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
-    'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    "3x3 convolution with padding"
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = BatchNorm2d(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
-        self.bn2 = BatchNorm2d(planes)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=1, bias=False)
-        self.bn2 = BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm2d(planes * 4)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Module):
-
-    def __init__(self, block, layers, num_classes=1000):
-        self.inplanes = 128
-        super(ResNet, self).__init__()
-        self.conv1 = conv3x3(3, 64, stride=2)
-        self.bn1 = BatchNorm2d(64)
-        self.relu1 = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(64, 64)
-        self.bn2 = BatchNorm2d(64)
-        self.relu2 = nn.ReLU(inplace=True)
-        self.conv3 = conv3x3(64, 128)
-        self.bn3 = BatchNorm2d(128)
-        self.relu3 = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
-        self.avgpool = nn.AvgPool2d(7, stride=1)
-        self.fc = nn.Linear(512 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _make_layer(self, block, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm2d(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.relu1(self.bn1(self.conv1(x)))
-        x = self.relu2(self.bn2(self.conv2(x)))
-        x = self.relu3(self.bn3(self.conv3(x)))
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-
-
-def resnet18(pretrained=False, **kwargs):
-    """Constructs a ResNet-18 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
-    if pretrained:
-        model.load_state_dict(load_url(model_urls['resnet18']))
-    return model
-
-
-def resnet50(pretrained=False, **kwargs):
-    """Constructs a ResNet-50 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
-    if pretrained:
-        model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
-    return model
-
-
-def resnet101(pretrained=False, **kwargs):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
-    if pretrained:
-        model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
-    return model
diff --git a/preprocess/humanparsing/networks/backbone/resnext.py b/preprocess/humanparsing/networks/backbone/resnext.py
deleted file mode 100644
index 96adb54..0000000
--- a/preprocess/humanparsing/networks/backbone/resnext.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   resnext.py.py
-@Time    :   8/11/19 8:58 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-import functools
-import torch.nn as nn
-import math
-from torch.utils.model_zoo import load_url
-
-from modules import InPlaceABNSync
-
-BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
-
-__all__ = ['ResNeXt', 'resnext101']  # support resnext 101
-
-model_urls = {
-    'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
-    'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    "3x3 convolution with padding"
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-class GroupBottleneck(nn.Module):
-    expansion = 2
-
-    def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
-        super(GroupBottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=1, groups=groups, bias=False)
-        self.bn2 = BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm2d(planes * 2)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class ResNeXt(nn.Module):
-
-    def __init__(self, block, layers, groups=32, num_classes=1000):
-        self.inplanes = 128
-        super(ResNeXt, self).__init__()
-        self.conv1 = conv3x3(3, 64, stride=2)
-        self.bn1 = BatchNorm2d(64)
-        self.relu1 = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(64, 64)
-        self.bn2 = BatchNorm2d(64)
-        self.relu2 = nn.ReLU(inplace=True)
-        self.conv3 = conv3x3(64, 128)
-        self.bn3 = BatchNorm2d(128)
-        self.relu3 = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
-        self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
-        self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
-        self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
-        self.avgpool = nn.AvgPool2d(7, stride=1)
-        self.fc = nn.Linear(1024 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _make_layer(self, block, planes, blocks, stride=1, groups=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm2d(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, groups, downsample))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes, groups=groups))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.relu1(self.bn1(self.conv1(x)))
-        x = self.relu2(self.bn2(self.conv2(x)))
-        x = self.relu3(self.bn3(self.conv3(x)))
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-
-
-def resnext101(pretrained=False, **kwargs):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
-    if pretrained:
-        model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
-    return model
diff --git a/preprocess/humanparsing/networks/context_encoding/aspp.py b/preprocess/humanparsing/networks/context_encoding/aspp.py
deleted file mode 100644
index d0ba531..0000000
--- a/preprocess/humanparsing/networks/context_encoding/aspp.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   aspp.py
-@Time    :   8/4/19 3:36 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-
-from modules import InPlaceABNSync
-
-
-class ASPPModule(nn.Module):
-    """
-    Reference:
-        Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
-    """
-    def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
-        super(ASPPModule, self).__init__()
-
-        self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
-                                   nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
-                                             bias=False),
-                                   InPlaceABNSync(inner_features))
-        self.conv2 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv4 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
-            InPlaceABNSync(inner_features))
-        self.conv5 = nn.Sequential(
-            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
-            InPlaceABNSync(inner_features))
-
-        self.bottleneck = nn.Sequential(
-            nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(out_features),
-            nn.Dropout2d(0.1)
-        )
-
-    def forward(self, x):
-        _, _, h, w = x.size()
-
-        feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
-
-        feat2 = self.conv2(x)
-        feat3 = self.conv3(x)
-        feat4 = self.conv4(x)
-        feat5 = self.conv5(x)
-        out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
-
-        bottle = self.bottleneck(out)
-        return bottle
\ No newline at end of file
diff --git a/preprocess/humanparsing/networks/context_encoding/ocnet.py b/preprocess/humanparsing/networks/context_encoding/ocnet.py
deleted file mode 100644
index ac43ebf..0000000
--- a/preprocess/humanparsing/networks/context_encoding/ocnet.py
+++ /dev/null
@@ -1,226 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   ocnet.py
-@Time    :   8/4/19 3:36 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import functools
-
-import torch
-import torch.nn as nn
-from torch.autograd import Variable
-from torch.nn import functional as F
-
-from modules import InPlaceABNSync
-BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
-
-
-class _SelfAttentionBlock(nn.Module):
-    '''
-    The basic implementation for self-attention block/non-local block
-    Input:
-        N X C X H X W
-    Parameters:
-        in_channels       : the dimension of the input feature map
-        key_channels      : the dimension after the key/query transform
-        value_channels    : the dimension after the value transform
-        scale             : choose the scale to downsample the input feature maps (save memory cost)
-    Return:
-        N X C X H X W
-        position-aware context features.(w/o concate or add with the input)
-    '''
-
-    def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
-        super(_SelfAttentionBlock, self).__init__()
-        self.scale = scale
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.key_channels = key_channels
-        self.value_channels = value_channels
-        if out_channels == None:
-            self.out_channels = in_channels
-        self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
-        self.f_key = nn.Sequential(
-            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
-                      kernel_size=1, stride=1, padding=0),
-            InPlaceABNSync(self.key_channels),
-        )
-        self.f_query = self.f_key
-        self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
-                                 kernel_size=1, stride=1, padding=0)
-        self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
-                           kernel_size=1, stride=1, padding=0)
-        nn.init.constant(self.W.weight, 0)
-        nn.init.constant(self.W.bias, 0)
-
-    def forward(self, x):
-        batch_size, h, w = x.size(0), x.size(2), x.size(3)
-        if self.scale > 1:
-            x = self.pool(x)
-
-        value = self.f_value(x).view(batch_size, self.value_channels, -1)
-        value = value.permute(0, 2, 1)
-        query = self.f_query(x).view(batch_size, self.key_channels, -1)
-        query = query.permute(0, 2, 1)
-        key = self.f_key(x).view(batch_size, self.key_channels, -1)
-
-        sim_map = torch.matmul(query, key)
-        sim_map = (self.key_channels ** -.5) * sim_map
-        sim_map = F.softmax(sim_map, dim=-1)
-
-        context = torch.matmul(sim_map, value)
-        context = context.permute(0, 2, 1).contiguous()
-        context = context.view(batch_size, self.value_channels, *x.size()[2:])
-        context = self.W(context)
-        if self.scale > 1:
-            context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
-        return context
-
-
-class SelfAttentionBlock2D(_SelfAttentionBlock):
-    def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
-        super(SelfAttentionBlock2D, self).__init__(in_channels,
-                                                   key_channels,
-                                                   value_channels,
-                                                   out_channels,
-                                                   scale)
-
-
-class BaseOC_Module(nn.Module):
-    """
-    Implementation of the BaseOC module
-    Parameters:
-        in_features / out_features: the channels of the input / output feature maps.
-        dropout: we choose 0.05 as the default value.
-        size: you can apply multiple sizes. Here we only use one size.
-    Return:
-        features fused with Object context information.
-    """
-
-    def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
-        super(BaseOC_Module, self).__init__()
-        self.stages = []
-        self.stages = nn.ModuleList(
-            [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
-        self.conv_bn_dropout = nn.Sequential(
-            nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
-            InPlaceABNSync(out_channels),
-            nn.Dropout2d(dropout)
-        )
-
-    def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
-        return SelfAttentionBlock2D(in_channels,
-                                    key_channels,
-                                    value_channels,
-                                    output_channels,
-                                    size)
-
-    def forward(self, feats):
-        priors = [stage(feats) for stage in self.stages]
-        context = priors[0]
-        for i in range(1, len(priors)):
-            context += priors[i]
-        output = self.conv_bn_dropout(torch.cat([context, feats], 1))
-        return output
-
-
-class BaseOC_Context_Module(nn.Module):
-    """
-    Output only the context features.
-    Parameters:
-        in_features / out_features: the channels of the input / output feature maps.
-        dropout: specify the dropout ratio
-        fusion: We provide two different fusion method, "concat" or "add"
-        size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
-    Return:
-        features after "concat" or "add"
-    """
-
-    def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
-        super(BaseOC_Context_Module, self).__init__()
-        self.stages = []
-        self.stages = nn.ModuleList(
-            [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
-        self.conv_bn_dropout = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
-            InPlaceABNSync(out_channels),
-        )
-
-    def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
-        return SelfAttentionBlock2D(in_channels,
-                                    key_channels,
-                                    value_channels,
-                                    output_channels,
-                                    size)
-
-    def forward(self, feats):
-        priors = [stage(feats) for stage in self.stages]
-        context = priors[0]
-        for i in range(1, len(priors)):
-            context += priors[i]
-        output = self.conv_bn_dropout(context)
-        return output
-
-
-class ASP_OC_Module(nn.Module):
-    def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
-        super(ASP_OC_Module, self).__init__()
-        self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
-                                     InPlaceABNSync(out_features),
-                                     BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
-                                                           key_channels=out_features // 2, value_channels=out_features,
-                                                           dropout=0, sizes=([2])))
-        self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
-                                   InPlaceABNSync(out_features))
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
-            InPlaceABNSync(out_features))
-        self.conv4 = nn.Sequential(
-            nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
-            InPlaceABNSync(out_features))
-        self.conv5 = nn.Sequential(
-            nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
-            InPlaceABNSync(out_features))
-
-        self.conv_bn_dropout = nn.Sequential(
-            nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
-            InPlaceABNSync(out_features),
-            nn.Dropout2d(0.1)
-        )
-
-    def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
-        assert (len(feat1) == len(feat2))
-        z = []
-        for i in range(len(feat1)):
-            z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
-        return z
-
-    def forward(self, x):
-        if isinstance(x, Variable):
-            _, _, h, w = x.size()
-        elif isinstance(x, tuple) or isinstance(x, list):
-            _, _, h, w = x[0].size()
-        else:
-            raise RuntimeError('unknown input type')
-
-        feat1 = self.context(x)
-        feat2 = self.conv2(x)
-        feat3 = self.conv3(x)
-        feat4 = self.conv4(x)
-        feat5 = self.conv5(x)
-
-        if isinstance(x, Variable):
-            out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
-        elif isinstance(x, tuple) or isinstance(x, list):
-            out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
-        else:
-            raise RuntimeError('unknown input type')
-        output = self.conv_bn_dropout(out)
-        return output
diff --git a/preprocess/humanparsing/networks/context_encoding/psp.py b/preprocess/humanparsing/networks/context_encoding/psp.py
deleted file mode 100644
index 47181dc..0000000
--- a/preprocess/humanparsing/networks/context_encoding/psp.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   psp.py
-@Time    :   8/4/19 3:36 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-
-from modules import InPlaceABNSync
-
-
-class PSPModule(nn.Module):
-    """
-    Reference:
-        Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
-    """
-    def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
-        super(PSPModule, self).__init__()
-
-        self.stages = []
-        self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
-        self.bottleneck = nn.Sequential(
-            nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
-                      bias=False),
-            InPlaceABNSync(out_features),
-        )
-
-    def _make_stage(self, features, out_features, size):
-        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
-        conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
-        bn = InPlaceABNSync(out_features)
-        return nn.Sequential(prior, conv, bn)
-
-    def forward(self, feats):
-        h, w = feats.size(2), feats.size(3)
-        priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
-                  self.stages] + [feats]
-        bottle = self.bottleneck(torch.cat(priors, 1))
-        return bottle
\ No newline at end of file
diff --git a/preprocess/humanparsing/utils/consistency_loss.py b/preprocess/humanparsing/utils/consistency_loss.py
deleted file mode 100644
index b872fdc..0000000
--- a/preprocess/humanparsing/utils/consistency_loss.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   kl_loss.py
-@Time    :   7/23/19 4:02 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-import torch
-import torch.nn.functional as F
-from torch import nn
-from datasets.target_generation import generate_edge_tensor
-
-
-class ConsistencyLoss(nn.Module):
-    def __init__(self, ignore_index=255):
-        super(ConsistencyLoss, self).__init__()
-        self.ignore_index=ignore_index
-
-    def forward(self, parsing, edge, label):
-        parsing_pre = torch.argmax(parsing, dim=1)
-        parsing_pre[label==self.ignore_index]=self.ignore_index
-        generated_edge = generate_edge_tensor(parsing_pre)
-        edge_pre = torch.argmax(edge, dim=1)
-        v_generate_edge = generated_edge[label!=255]
-        v_edge_pre = edge_pre[label!=255]
-        v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor)
-        positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count
-        return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0))
diff --git a/preprocess/humanparsing/utils/criterion.py b/preprocess/humanparsing/utils/criterion.py
deleted file mode 100644
index 9688943..0000000
--- a/preprocess/humanparsing/utils/criterion.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   criterion.py
-@Time    :   8/30/19 8:59 PM
-@Desc    :
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-import torch.nn as nn
-import torch
-import numpy as np
-from torch.nn import functional as F
-from .lovasz_softmax import LovaszSoftmax
-from .kl_loss import KLDivergenceLoss
-from .consistency_loss import ConsistencyLoss
-
-NUM_CLASSES = 20
-
-
-class CriterionAll(nn.Module):
-    def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1,
-                 num_classes=20):
-        super(CriterionAll, self).__init__()
-        self.ignore_index = ignore_index
-        self.use_class_weight = use_class_weight
-        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
-        self.lovasz = LovaszSoftmax(ignore_index=ignore_index)
-        self.kldiv = KLDivergenceLoss(ignore_index=ignore_index)
-        self.reg = ConsistencyLoss(ignore_index=ignore_index)
-        self.lamda_1 = lambda_1
-        self.lamda_2 = lambda_2
-        self.lamda_3 = lambda_3
-        self.num_classes = num_classes
-
-    def parsing_loss(self, preds, target, cycle_n=None):
-        """
-        Loss function definition.
-
-        Args:
-            preds: [[parsing result1, parsing result2],[edge result]]
-            target: [parsing label, egde label]
-            soft_preds: [[parsing result1, parsing result2],[edge result]]
-        Returns:
-            Calculated Loss.
-        """
-        h, w = target[0].size(1), target[0].size(2)
-
-        pos_num = torch.sum(target[1] == 1, dtype=torch.float)
-        neg_num = torch.sum(target[1] == 0, dtype=torch.float)
-
-        weight_pos = neg_num / (pos_num + neg_num)
-        weight_neg = pos_num / (pos_num + neg_num)
-        weights = torch.tensor([weight_neg, weight_pos])  # edge loss weight
-
-        loss = 0
-
-        # loss for segmentation
-        preds_parsing = preds[0]
-        for pred_parsing in preds_parsing:
-            scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
-                                       mode='bilinear', align_corners=True)
-
-            loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0])
-            if target[2] is None:
-                loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0])
-            else:
-                soft_scale_pred = F.interpolate(input=target[2], size=(h, w),
-                                                mode='bilinear', align_corners=True)
-                soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes),
-                                                 1.0 / (cycle_n + 1.0))
-                loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0])
-
-        # loss for edge
-        preds_edge = preds[1]
-        for pred_edge in preds_edge:
-            scale_pred = F.interpolate(input=pred_edge, size=(h, w),
-                                       mode='bilinear', align_corners=True)
-            if target[3] is None:
-                loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1],
-                                                       weights.cuda(), ignore_index=self.ignore_index)
-            else:
-                soft_scale_edge = F.interpolate(input=target[3], size=(h, w),
-                                                mode='bilinear', align_corners=True)
-                soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2),
-                                                 1.0 / (cycle_n + 1.0))
-                loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0])
-
-        # consistency regularization
-        preds_parsing = preds[0]
-        preds_edge = preds[1]
-        for pred_parsing in preds_parsing:
-            scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
-                                       mode='bilinear', align_corners=True)
-            scale_edge = F.interpolate(input=preds_edge[0], size=(h, w),
-                                       mode='bilinear', align_corners=True)
-            loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0])
-
-        return loss
-
-    def forward(self, preds, target, cycle_n=None):
-        loss = self.parsing_loss(preds, target, cycle_n)
-        return loss
-
-    def _generate_weights(self, masks, num_classes):
-        """
-        masks: torch.Tensor with shape [B, H, W]
-        """
-        masks_label = masks.data.cpu().numpy().astype(np.int64)
-        pixel_nums = []
-        tot_pixels = 0
-        for i in range(num_classes):
-            pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float)
-            pixel_nums.append(pixel_num_of_cls_i)
-            tot_pixels += pixel_num_of_cls_i
-        weights = []
-        for i in range(num_classes):
-            weights.append(
-                (tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1)
-            )
-        weights = np.array(weights, dtype=np.float)
-        # weights = torch.from_numpy(weights).float().to(masks.device)
-        return weights
-
-
-def moving_average(target1, target2, alpha=1.0):
-    target = 0
-    target += (1.0 - alpha) * target1
-    target += target2 * alpha
-    return target
-
-
-def to_one_hot(tensor, num_cls, dim=1, ignore_index=255):
-    b, h, w = tensor.shape
-    tensor[tensor == ignore_index] = 0
-    onehot_tensor = torch.zeros(b, num_cls, h, w).cuda()
-    onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1)
-    return onehot_tensor
diff --git a/preprocess/humanparsing/utils/encoding.py b/preprocess/humanparsing/utils/encoding.py
deleted file mode 100644
index e865470..0000000
--- a/preprocess/humanparsing/utils/encoding.py
+++ /dev/null
@@ -1,188 +0,0 @@
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-## Created by: Hang Zhang
-## ECE Department, Rutgers University
-## Email: zhang.hang@rutgers.edu
-## Copyright (c) 2017
-##
-## This source code is licensed under the MIT-style license found in the
-## LICENSE file in the root directory of this source tree
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-"""Encoding Data Parallel"""
-import threading
-import functools
-import torch
-from torch.autograd import Variable, Function
-import torch.cuda.comm as comm
-from torch.nn.parallel.data_parallel import DataParallel
-from torch.nn.parallel.parallel_apply import get_a_var
-from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
-
-torch_ver = torch.__version__[:3]
-
-__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback']
-
-def allreduce(*inputs):
-    """Cross GPU all reduce autograd operation for calculate mean and
-    variance in SyncBN.
-    """
-    return AllReduce.apply(*inputs)
-
-class AllReduce(Function):
-    @staticmethod
-    def forward(ctx, num_inputs, *inputs):
-        ctx.num_inputs = num_inputs
-        ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
-        inputs = [inputs[i:i + num_inputs]
-                 for i in range(0, len(inputs), num_inputs)]
-        # sort before reduce sum
-        inputs = sorted(inputs, key=lambda i: i[0].get_device())
-        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
-        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
-        return tuple([t for tensors in outputs for t in tensors])
-
-    @staticmethod
-    def backward(ctx, *inputs):
-        inputs = [i.data for i in inputs]
-        inputs = [inputs[i:i + ctx.num_inputs]
-                 for i in range(0, len(inputs), ctx.num_inputs)]
-        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
-        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
-        return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
-
-class Reduce(Function):
-    @staticmethod
-    def forward(ctx, *inputs):
-        ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
-        inputs = sorted(inputs, key=lambda i: i.get_device())
-        return comm.reduce_add(inputs)
-
-    @staticmethod
-    def backward(ctx, gradOutput):
-        return Broadcast.apply(ctx.target_gpus, gradOutput)
-
-
-class DataParallelModel(DataParallel):
-    """Implements data parallelism at the module level.
-
-    This container parallelizes the application of the given module by
-    splitting the input across the specified devices by chunking in the
-    batch dimension.
-    In the forward pass, the module is replicated on each device,
-    and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
-    Note that the outputs are not gathered, please use compatible
-    :class:`encoding.parallel.DataParallelCriterion`.
-
-    The batch size should be larger than the number of GPUs used. It should
-    also be an integer multiple of the number of GPUs so that each chunk is
-    the same size (so that each GPU processes the same number of samples).
-
-    Args:
-        module: module to be parallelized
-        device_ids: CUDA devices (default: all devices)
-
-    Reference:
-        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
-        Amit Agrawal. “Context Encoding for Semantic Segmentation.
-        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
-
-    Example::
-
-        >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
-        >>> y = net(x)
-    """
-    def gather(self, outputs, output_device):
-        return outputs
-
-    def replicate(self, module, device_ids):
-        modules = super(DataParallelModel, self).replicate(module, device_ids)
-        return modules
-
-
-class DataParallelCriterion(DataParallel):
-    """
-    Calculate loss in multiple-GPUs, which balance the memory usage for
-    Semantic Segmentation.
-
-    The targets are splitted across the specified devices by chunking in
-    the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
-
-    Reference:
-        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
-        Amit Agrawal. “Context Encoding for Semantic Segmentation.
-        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
-
-    Example::
-
-        >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
-        >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
-        >>> y = net(x)
-        >>> loss = criterion(y, target)
-    """
-    def forward(self, inputs, *targets, **kwargs):
-        # input should be already scatterd
-        # scattering the targets instead
-        if not self.device_ids:
-            return self.module(inputs, *targets, **kwargs)
-        targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
-        if len(self.device_ids) == 1:
-            return self.module(inputs, *targets[0], **kwargs[0])
-        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
-        outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
-        return Reduce.apply(*outputs) / len(outputs)
-
-
-def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
-    assert len(modules) == len(inputs)
-    assert len(targets) == len(inputs)
-    if kwargs_tup:
-        assert len(modules) == len(kwargs_tup)
-    else:
-        kwargs_tup = ({},) * len(modules)
-    if devices is not None:
-        assert len(modules) == len(devices)
-    else:
-        devices = [None] * len(modules)
-
-    lock = threading.Lock()
-    results = {}
-    if torch_ver != "0.3":
-        grad_enabled = torch.is_grad_enabled()
-
-    def _worker(i, module, input, target, kwargs, device=None):
-        if torch_ver != "0.3":
-            torch.set_grad_enabled(grad_enabled)
-        if device is None:
-            device = get_a_var(input).get_device()
-        try:
-            if not isinstance(input, tuple):
-                input = (input,)
-            with torch.cuda.device(device):
-                output = module(*(input + target), **kwargs)
-            with lock:
-                results[i] = output
-        except Exception as e:
-            with lock:
-                results[i] = e
-
-    if len(modules) > 1:
-        threads = [threading.Thread(target=_worker,
-                                    args=(i, module, input, target,
-                                          kwargs, device),)
-                   for i, (module, input, target, kwargs, device) in
-                   enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
-
-        for thread in threads:
-            thread.start()
-        for thread in threads:
-            thread.join()
-    else:
-        _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
-
-    outputs = []
-    for i in range(len(inputs)):
-        output = results[i]
-        if isinstance(output, Exception):
-            raise output
-        outputs.append(output)
-    return outputs
diff --git a/preprocess/humanparsing/utils/kl_loss.py b/preprocess/humanparsing/utils/kl_loss.py
deleted file mode 100644
index 9a685d9..0000000
--- a/preprocess/humanparsing/utils/kl_loss.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   kl_loss.py
-@Time    :   7/23/19 4:02 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-
-def flatten_probas(input, target, labels, ignore=255):
-    """
-    Flattens predictions in the batch.
-    """
-    B, C, H, W = input.size()
-    input = input.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
-    target = target.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
-    labels = labels.view(-1)
-    if ignore is None:
-        return input, target
-    valid = (labels != ignore)
-    vinput = input[valid.nonzero().squeeze()]
-    vtarget = target[valid.nonzero().squeeze()]
-    return vinput, vtarget
-
-
-class KLDivergenceLoss(nn.Module):
-    def __init__(self, ignore_index=255, T=1):
-        super(KLDivergenceLoss, self).__init__()
-        self.ignore_index=ignore_index
-        self.T = T
-
-    def forward(self, input, target, label):
-        log_input_prob = F.log_softmax(input / self.T, dim=1)
-        target_porb = F.softmax(target / self.T, dim=1)
-        loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index))
-        return self.T*self.T*loss # balanced
diff --git a/preprocess/humanparsing/utils/lovasz_softmax.py b/preprocess/humanparsing/utils/lovasz_softmax.py
deleted file mode 100644
index b6e444f..0000000
--- a/preprocess/humanparsing/utils/lovasz_softmax.py
+++ /dev/null
@@ -1,279 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   lovasz_softmax.py
-@Time    :   8/30/19 7:12 PM
-@Desc    :   Lovasz-Softmax and Jaccard hinge loss in PyTorch
-             Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
-@License :   This source code is licensed under the license found in the
-             LICENSE file in the root directory of this source tree.
-"""
-
-from __future__ import print_function, division
-
-import torch
-from torch.autograd import Variable
-import torch.nn.functional as F
-import numpy as np
-from torch import nn
-
-try:
-    from itertools import ifilterfalse
-except ImportError:  # py3k
-    from itertools import filterfalse as ifilterfalse
-
-
-def lovasz_grad(gt_sorted):
-    """
-    Computes gradient of the Lovasz extension w.r.t sorted errors
-    See Alg. 1 in paper
-    """
-    p = len(gt_sorted)
-    gts = gt_sorted.sum()
-    intersection = gts - gt_sorted.float().cumsum(0)
-    union = gts + (1 - gt_sorted).float().cumsum(0)
-    jaccard = 1. - intersection / union
-    if p > 1:  # cover 1-pixel case
-        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
-    return jaccard
-
-
-def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
-    """
-    IoU for foreground class
-    binary: 1 foreground, 0 background
-    """
-    if not per_image:
-        preds, labels = (preds,), (labels,)
-    ious = []
-    for pred, label in zip(preds, labels):
-        intersection = ((label == 1) & (pred == 1)).sum()
-        union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
-        if not union:
-            iou = EMPTY
-        else:
-            iou = float(intersection) / float(union)
-        ious.append(iou)
-    iou = mean(ious)  # mean accross images if per_image
-    return 100 * iou
-
-
-def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
-    """
-    Array of IoU for each (non ignored) class
-    """
-    if not per_image:
-        preds, labels = (preds,), (labels,)
-    ious = []
-    for pred, label in zip(preds, labels):
-        iou = []
-        for i in range(C):
-            if i != ignore:  # The ignored label is sometimes among predicted classes (ENet - CityScapes)
-                intersection = ((label == i) & (pred == i)).sum()
-                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
-                if not union:
-                    iou.append(EMPTY)
-                else:
-                    iou.append(float(intersection) / float(union))
-        ious.append(iou)
-    ious = [mean(iou) for iou in zip(*ious)]  # mean accross images if per_image
-    return 100 * np.array(ious)
-
-
-# --------------------------- BINARY LOSSES ---------------------------
-
-
-def lovasz_hinge(logits, labels, per_image=True, ignore=None):
-    """
-    Binary Lovasz hinge loss
-      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
-      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
-      per_image: compute the loss per image instead of per batch
-      ignore: void class id
-    """
-    if per_image:
-        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
-                    for log, lab in zip(logits, labels))
-    else:
-        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
-    return loss
-
-
-def lovasz_hinge_flat(logits, labels):
-    """
-    Binary Lovasz hinge loss
-      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
-      labels: [P] Tensor, binary ground truth labels (0 or 1)
-      ignore: label to ignore
-    """
-    if len(labels) == 0:
-        # only void pixels, the gradients should be 0
-        return logits.sum() * 0.
-    signs = 2. * labels.float() - 1.
-    errors = (1. - logits * Variable(signs))
-    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
-    perm = perm.data
-    gt_sorted = labels[perm]
-    grad = lovasz_grad(gt_sorted)
-    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
-    return loss
-
-
-def flatten_binary_scores(scores, labels, ignore=None):
-    """
-    Flattens predictions in the batch (binary case)
-    Remove labels equal to 'ignore'
-    """
-    scores = scores.view(-1)
-    labels = labels.view(-1)
-    if ignore is None:
-        return scores, labels
-    valid = (labels != ignore)
-    vscores = scores[valid]
-    vlabels = labels[valid]
-    return vscores, vlabels
-
-
-class StableBCELoss(torch.nn.modules.Module):
-    def __init__(self):
-        super(StableBCELoss, self).__init__()
-
-    def forward(self, input, target):
-        neg_abs = - input.abs()
-        loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
-        return loss.mean()
-
-
-def binary_xloss(logits, labels, ignore=None):
-    """
-    Binary Cross entropy loss
-      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
-      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
-      ignore: void class id
-    """
-    logits, labels = flatten_binary_scores(logits, labels, ignore)
-    loss = StableBCELoss()(logits, Variable(labels.float()))
-    return loss
-
-
-# --------------------------- MULTICLASS LOSSES ---------------------------
-
-
-def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None):
-    """
-    Multi-class Lovasz-Softmax loss
-      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
-              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
-      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
-      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
-      per_image: compute the loss per image instead of per batch
-      ignore: void class labels
-    """
-    if per_image:
-        loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted)
-                    for prob, lab in zip(probas, labels))
-    else:
-        loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted )
-    return loss
-
-
-def lovasz_softmax_flat(probas, labels, classes='present', weighted=None):
-    """
-    Multi-class Lovasz-Softmax loss
-      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
-      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
-      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
-    """
-    if probas.numel() == 0:
-        # only void pixels, the gradients should be 0
-        return probas * 0.
-    C = probas.size(1)
-    losses = []
-    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
-    for c in class_to_sum:
-        fg = (labels == c).float()  # foreground for class c
-        if (classes is 'present' and fg.sum() == 0):
-            continue
-        if C == 1:
-            if len(classes) > 1:
-                raise ValueError('Sigmoid output possible only with 1 class')
-            class_pred = probas[:, 0]
-        else:
-            class_pred = probas[:, c]
-        errors = (Variable(fg) - class_pred).abs()
-        errors_sorted, perm = torch.sort(errors, 0, descending=True)
-        perm = perm.data
-        fg_sorted = fg[perm]
-        if weighted is not None:
-            losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
-        else:
-            losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
-    return mean(losses)
-
-
-def flatten_probas(probas, labels, ignore=None):
-    """
-    Flattens predictions in the batch
-    """
-    if probas.dim() == 3:
-        # assumes output of a sigmoid layer
-        B, H, W = probas.size()
-        probas = probas.view(B, 1, H, W)
-    B, C, H, W = probas.size()
-    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
-    labels = labels.view(-1)
-    if ignore is None:
-        return probas, labels
-    valid = (labels != ignore)
-    vprobas = probas[valid.nonzero().squeeze()]
-    vlabels = labels[valid]
-    return vprobas, vlabels
-
-
-def xloss(logits, labels, ignore=None):
-    """
-    Cross entropy loss
-    """
-    return F.cross_entropy(logits, Variable(labels), ignore_index=255)
-
-
-# --------------------------- HELPER FUNCTIONS ---------------------------
-def isnan(x):
-    return x != x
-
-
-def mean(l, ignore_nan=False, empty=0):
-    """
-    nanmean compatible with generators.
-    """
-    l = iter(l)
-    if ignore_nan:
-        l = ifilterfalse(isnan, l)
-    try:
-        n = 1
-        acc = next(l)
-    except StopIteration:
-        if empty == 'raise':
-            raise ValueError('Empty mean')
-        return empty
-    for n, v in enumerate(l, 2):
-        acc += v
-    if n == 1:
-        return acc
-    return acc / n
-
-# --------------------------- Class ---------------------------
-class LovaszSoftmax(nn.Module):
-    def __init__(self, per_image=False, ignore_index=255, weighted=None):
-        super(LovaszSoftmax, self).__init__()
-        self.lovasz_softmax = lovasz_softmax
-        self.per_image = per_image
-        self.ignore_index=ignore_index
-        self.weighted = weighted
-
-    def forward(self, pred, label):
-        pred = F.softmax(pred, dim=1)
-        return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted)
\ No newline at end of file
diff --git a/preprocess/humanparsing/utils/miou.py b/preprocess/humanparsing/utils/miou.py
deleted file mode 100644
index 51a2cc9..0000000
--- a/preprocess/humanparsing/utils/miou.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import cv2
-import os
-import numpy as np
-
-from collections import OrderedDict
-from PIL import Image as PILImage
-from utils.transforms import transform_parsing
-
-LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \
-          'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg',
-          'Right-leg', 'Left-shoe', 'Right-shoe']
-
-
-# LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
-
-def get_palette(num_cls):
-    """ Returns the color map for visualizing the segmentation mask.
-    Args:
-        num_cls: Number of classes
-    Returns:
-        The color map
-    """
-
-    n = num_cls
-    palette = [0] * (n * 3)
-    for j in range(0, n):
-        lab = j
-        palette[j * 3 + 0] = 0
-        palette[j * 3 + 1] = 0
-        palette[j * 3 + 2] = 0
-        i = 0
-        while lab:
-            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
-            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
-            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
-            i += 1
-            lab >>= 3
-    return palette
-
-
-def get_confusion_matrix(gt_label, pred_label, num_classes):
-    """
-    Calcute the confusion matrix by given label and pred
-    :param gt_label: the ground truth label
-    :param pred_label: the pred label
-    :param num_classes: the nunber of class
-    :return: the confusion matrix
-    """
-    index = (gt_label * num_classes + pred_label).astype('int32')
-    label_count = np.bincount(index)
-    confusion_matrix = np.zeros((num_classes, num_classes))
-
-    for i_label in range(num_classes):
-        for i_pred_label in range(num_classes):
-            cur_index = i_label * num_classes + i_pred_label
-            if cur_index < len(label_count):
-                confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
-
-    return confusion_matrix
-
-
-def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'):
-    val_file = os.path.join(datadir, dataset + '_id.txt')
-    val_id = [i_id.strip() for i_id in open(val_file)]
-
-    confusion_matrix = np.zeros((num_classes, num_classes))
-
-    for i, pred_out in enumerate(preds):
-        im_name = val_id[i]
-        gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png')
-        gt = np.array(PILImage.open(gt_path))
-        h, w = gt.shape
-        s = scales[i]
-        c = centers[i]
-        pred = transform_parsing(pred_out, c, s, w, h, input_size)
-
-        gt = np.asarray(gt, dtype=np.int32)
-        pred = np.asarray(pred, dtype=np.int32)
-
-        ignore_index = gt != 255
-
-        gt = gt[ignore_index]
-        pred = pred[ignore_index]
-
-        confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
-
-    pos = confusion_matrix.sum(1)
-    res = confusion_matrix.sum(0)
-    tp = np.diag(confusion_matrix)
-
-    pixel_accuracy = (tp.sum() / pos.sum()) * 100
-    mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
-    IoU_array = (tp / np.maximum(1.0, pos + res - tp))
-    IoU_array = IoU_array * 100
-    mean_IoU = IoU_array.mean()
-    print('Pixel accuracy: %f \n' % pixel_accuracy)
-    print('Mean accuracy: %f \n' % mean_accuracy)
-    print('Mean IU: %f \n' % mean_IoU)
-    name_value = []
-
-    for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
-        name_value.append((label, iou))
-
-    name_value.append(('Pixel accuracy', pixel_accuracy))
-    name_value.append(('Mean accuracy', mean_accuracy))
-    name_value.append(('Mean IU', mean_IoU))
-    name_value = OrderedDict(name_value)
-    return name_value
-
-
-def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'):
-    list_path = os.path.join(datadir, dataset + '_id.txt')
-    val_id = [i_id.strip() for i_id in open(list_path)]
-
-    confusion_matrix = np.zeros((num_classes, num_classes))
-
-    for i, im_name in enumerate(val_id):
-        gt_path = os.path.join(datadir, 'segmentations', im_name + '.png')
-        gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
-
-        pred_path = os.path.join(preds_dir, im_name + '.png')
-        pred = np.asarray(PILImage.open(pred_path))
-
-        gt = np.asarray(gt, dtype=np.int32)
-        pred = np.asarray(pred, dtype=np.int32)
-
-        ignore_index = gt != 255
-
-        gt = gt[ignore_index]
-        pred = pred[ignore_index]
-
-        confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
-
-    pos = confusion_matrix.sum(1)
-    res = confusion_matrix.sum(0)
-    tp = np.diag(confusion_matrix)
-
-    pixel_accuracy = (tp.sum() / pos.sum()) * 100
-    mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
-    IoU_array = (tp / np.maximum(1.0, pos + res - tp))
-    IoU_array = IoU_array * 100
-    mean_IoU = IoU_array.mean()
-    print('Pixel accuracy: %f \n' % pixel_accuracy)
-    print('Mean accuracy: %f \n' % mean_accuracy)
-    print('Mean IU: %f \n' % mean_IoU)
-    name_value = []
-
-    for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
-        name_value.append((label, iou))
-
-    name_value.append(('Pixel accuracy', pixel_accuracy))
-    name_value.append(('Mean accuracy', mean_accuracy))
-    name_value.append(('Mean IU', mean_IoU))
-    name_value = OrderedDict(name_value)
-    return name_value
diff --git a/preprocess/humanparsing/utils/schp.py b/preprocess/humanparsing/utils/schp.py
deleted file mode 100644
index f574704..0000000
--- a/preprocess/humanparsing/utils/schp.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   schp.py
-@Time    :   4/8/19 2:11 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import os
-import torch
-import modules
-
-def moving_average(net1, net2, alpha=1):
-    for param1, param2 in zip(net1.parameters(), net2.parameters()):
-        param1.data *= (1.0 - alpha)
-        param1.data += param2.data * alpha
-
-
-def _check_bn(module, flag):
-    if issubclass(module.__class__, modules.bn.InPlaceABNSync):
-        flag[0] = True
-
-
-def check_bn(model):
-    flag = [False]
-    model.apply(lambda module: _check_bn(module, flag))
-    return flag[0]
-
-
-def reset_bn(module):
-    if issubclass(module.__class__, modules.bn.InPlaceABNSync):
-        module.running_mean = torch.zeros_like(module.running_mean)
-        module.running_var = torch.ones_like(module.running_var)
-
-
-def _get_momenta(module, momenta):
-    if issubclass(module.__class__, modules.bn.InPlaceABNSync):
-        momenta[module] = module.momentum
-
-
-def _set_momenta(module, momenta):
-    if issubclass(module.__class__, modules.bn.InPlaceABNSync):
-        module.momentum = momenta[module]
-
-
-def bn_re_estimate(loader, model):
-    if not check_bn(model):
-        print('No batch norm layer detected')
-        return
-    model.train()
-    momenta = {}
-    model.apply(reset_bn)
-    model.apply(lambda module: _get_momenta(module, momenta))
-    n = 0
-    for i_iter, batch in enumerate(loader):
-        images, labels, _ = batch
-        b = images.data.size(0)
-        momentum = b / (n + b)
-        for module in momenta.keys():
-            module.momentum = momentum
-        model(images)
-        n += b
-    model.apply(lambda module: _set_momenta(module, momenta))
-
-
-def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'):
-    save_path = os.path.join(output_dir, filename)
-    if os.path.exists(save_path):
-        os.remove(save_path)
-    torch.save(states, save_path)
-    if is_best_parsing and 'state_dict' in states:
-        best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar')
-        if os.path.exists(best_save_path):
-            os.remove(best_save_path)
-        torch.save(states, best_save_path)
diff --git a/preprocess/humanparsing/utils/soft_dice_loss.py b/preprocess/humanparsing/utils/soft_dice_loss.py
deleted file mode 100644
index cb5895f..0000000
--- a/preprocess/humanparsing/utils/soft_dice_loss.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   soft_dice_loss.py
-@Time    :   8/13/19 5:09 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-from __future__ import print_function, division
-
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-try:
-    from itertools import ifilterfalse
-except ImportError:  # py3k
-    from itertools import filterfalse as ifilterfalse
-
-
-def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6):
-    '''
-    Tversky loss function.
-        probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
-        labels: [P] Tensor, ground truth labels (between 0 and C - 1)
-
-    Same as soft dice loss when alpha=beta=0.5.
-    Same as Jaccord loss when alpha=beta=1.0.
-    See `Tversky loss function for image segmentation using 3D fully convolutional deep networks`
-    https://arxiv.org/pdf/1706.05721.pdf
-    '''
-    C = probas.size(1)
-    losses = []
-    for c in list(range(C)):
-        fg = (labels == c).float()
-        if fg.sum() == 0:
-            continue
-        class_pred = probas[:, c]
-        p0 = class_pred
-        p1 = 1 - class_pred
-        g0 = fg
-        g1 = 1 - fg
-        numerator = torch.sum(p0 * g0)
-        denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0)
-        losses.append(1 - ((numerator) / (denominator + epsilon)))
-    return mean(losses)
-
-
-def flatten_probas(probas, labels, ignore=255):
-    """
-    Flattens predictions in the batch
-    """
-    B, C, H, W = probas.size()
-    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
-    labels = labels.view(-1)
-    if ignore is None:
-        return probas, labels
-    valid = (labels != ignore)
-    vprobas = probas[valid.nonzero().squeeze()]
-    vlabels = labels[valid]
-    return vprobas, vlabels
-
-
-def isnan(x):
-    return x != x
-
-
-def mean(l, ignore_nan=False, empty=0):
-    """
-    nanmean compatible with generators.
-    """
-    l = iter(l)
-    if ignore_nan:
-        l = ifilterfalse(isnan, l)
-    try:
-        n = 1
-        acc = next(l)
-    except StopIteration:
-        if empty == 'raise':
-            raise ValueError('Empty mean')
-        return empty
-    for n, v in enumerate(l, 2):
-        acc += v
-    if n == 1:
-        return acc
-    return acc / n
-
-
-class SoftDiceLoss(nn.Module):
-    def __init__(self, ignore_index=255):
-        super(SoftDiceLoss, self).__init__()
-        self.ignore_index = ignore_index
-
-    def forward(self, pred, label):
-        pred = F.softmax(pred, dim=1)
-        return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5)
-
-
-class SoftJaccordLoss(nn.Module):
-    def __init__(self, ignore_index=255):
-        super(SoftJaccordLoss, self).__init__()
-        self.ignore_index = ignore_index
-
-    def forward(self, pred, label):
-        pred = F.softmax(pred, dim=1)
-        return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0)
diff --git a/preprocess/humanparsing/utils/warmup_scheduler.py b/preprocess/humanparsing/utils/warmup_scheduler.py
deleted file mode 100644
index 2528a9c..0000000
--- a/preprocess/humanparsing/utils/warmup_scheduler.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-"""
-@Author  :   Peike Li
-@Contact :   peike.li@yahoo.com
-@File    :   warmup_scheduler.py
-@Time    :   3/28/19 2:24 PM
-@Desc    :   
-@License :   This source code is licensed under the license found in the 
-             LICENSE file in the root directory of this source tree.
-"""
-
-import math
-from torch.optim.lr_scheduler import _LRScheduler
-
-
-class GradualWarmupScheduler(_LRScheduler):
-    """ Gradually warm-up learning rate with cosine annealing in optimizer.
-    Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
-    """
-
-    def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1):
-        self.total_epoch = total_epoch
-        self.eta_min = eta_min
-        self.warmup_epoch = warmup_epoch
-        super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if self.last_epoch <= self.warmup_epoch:
-            return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
-        else:
-            return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
-
-
-class SGDRScheduler(_LRScheduler):
-    """ Consine annealing with warm up and restarts.
-    Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`.
-    """
-    def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1):
-        self.total_epoch = total_epoch
-        self.start_cyclical = start_cyclical
-        self.cyclical_epoch = cyclical_epoch
-        self.cyclical_base_lr = cyclical_base_lr
-        self.eta_min = eta_min
-        self.warmup_epoch = warmup_epoch
-        super(SGDRScheduler, self).__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if self.last_epoch < self.warmup_epoch:
-            return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
-        elif self.last_epoch < self.start_cyclical:
-            return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
-        else:
-            return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs]
-
-
-if __name__ == '__main__':
-    import matplotlib.pyplot as plt
-    import torch
-    model = torch.nn.Linear(10, 2)
-    optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4)
-    scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10)
-    lr = []
-    for epoch in range(0,150):
-        scheduler_warmup.step(epoch)
-        lr.append(scheduler_warmup.get_lr())
-    plt.style.use('ggplot')
-    plt.plot(list(range(0,150)), lr)
-    plt.show()
-