diff --git a/new_preprocess/ReadMe.md b/new_preprocess/ReadMe.md new file mode 100644 index 0000000..1fdae5d --- /dev/null +++ b/new_preprocess/ReadMe.md @@ -0,0 +1,159 @@ +# iSAID Preprocessing and YOLO Conversion Toolkit + +A complete, step-by-step toolkit to preprocess the iSAID (Instance-Level Semantic Annotation for Aerial Images) dataset. The scripts guide you through splitting the large aerial images into smaller patches, generating annotations in the standard COCO format, and optionally converting the entire dataset into the segmentation format required by Ultralytics YOLO. + +**Author:** Mridankan Mandal + +## Features + +- **Image Patching**: Splits the large source images into smaller, overlapping patches suitable for training. +- **COCO Annotation Generation**: Creates COCO-style JSON annotation files from the iSAID instance masks for the training and validation sets. +- **Test Set Handling**: Generates a COCO-compliant JSON file for the test set images (without labels). +- **YOLO Format Conversion**: Provides a script to convert the COCO-formatted dataset into the YOLO segmentation format, including `.txt` label files and the required `data.yaml`. + +## Prerequisites + +1. **Python 3.6+ (Tested on Python 3.6 and Python 3.11)** +2. The iSAID dataset. Download it from the [official website](https://captain-whu.github.io/iSAID/). +3. Install the required Python libraries: + ```bash + pip install -r requirements.txt + ``` +**Note**: This Toolkit has been tested extensively on Windows 11, and Python 3.11. + +## Directory Setup + +Before you begin, you must organize your downloaded iSAID dataset into the following structure: + +``` +iSAID_dataset/ +├── train/ +│ └── images/ +│ ├── P0002.png +│ ├── P0002_instance_color_RGB.png +│ ├── P0002_instance_id_RGB.png +│ └── ... +├── val/ +│ └── images/ +│ ├── P0001.png +│ ├── P0001_instance_color_RGB.png +│ ├── P0001_instance_id_RGB.png +│ └── ... +└── test/ + └── images/ + ├── P0003.png + └── ... +``` + +For the default commands to work, download and place the `iSAID_dataset` folder inside this project's root directory, as shown below: + +``` +new_iSAID_Toolkit/ +├── iSAID_dataset/ <- Place raw dataset here +│ ├── train/ +│ │ └── images/ +│ ├── val/ +│ │ └── images/ +│ └── test/ +│ └── images/ +├── split.py +├── preprocess.py +├── generate_test_json.py +├── convert_to_yolo.py +├── requirements.txt +└── README.md +``` + +## Usage Workflow + +Follow these steps in order to process the dataset. Each step includes a simple command that relies on the default directory structure, and a second, more explicit command that shows all parameters. + +### Step 1: Split Large Images into Patches + +This script creates smaller, overlapping patches and places them in a new `iSAID_patches` directory. + +* **Command (using defaults):** + ```bash + python split.py + ``` + +* **Command (with explicit arguments):** + ```bash + python split.py --src ./iSAID_dataset --tar ./iSAID_patches --patch_width 800 --patch_height 800 --overlap_area 200 --set train,val,test + ``` + +### Step 2: Generate COCO Annotations + +This step creates COCO-style JSON annotation files for the `train` and `val` sets. + +* **Command (using defaults):** + ```bash + python preprocess.py + ``` + +* **Command (with explicit arguments):** + ```bash + python preprocess.py --datadir ./iSAID_patches --outdir ./iSAID_patches --set train,val + ``` + +### Step 3: Generate Test Set JSON File + +This creates a JSON file for the test images, which is useful for a consistent dataset structure. + +* **Command (using defaults):** + ```bash + python generate_test_json.py + ``` + +* **Command (with explicit arguments):** + ```bash + python generate_test_json.py --datadir ./iSAID_patches --outdir ./iSAID_patches --set test + ``` + +### Step 4 (Optional): Convert to YOLO Segmentation Format + +If you intend to train a YOLO segmentation model, this final script converts the COCO-formatted data into the required YOLO format. + +* **Command (using defaults):** + ```bash + python convert_to_yolo.py + ``` + +* **Command (with explicit arguments):** + ```bash + python convert_to_yolo.py --datadir ./iSAID_patches --outdir ./iSAID_YOLO_Dataset + ``` + +## Final Output Structure + +After running all the steps, you will have two primary output directories: + +1. **`./iSAID_patches`**: The dataset in COCO format, ready for use with frameworks like Detectron2, MMDetection, etc. + ``` + iSAID_patches/ + ├── train/ + │ ├── images/ + │ └── instancesonly_filtered_train.json + ├── val/ + │ ├── images/ + │ └── instancesonly_filtered_val.json + └── test/ + ├── images/ + └── instancesonly_filtered_test.json + ``` +2. **`./iSAID_YOLO_Dataset`**: The dataset in YOLOv8 segmentation format, ready for training with Ultralytics. + ``` + iSAID_YOLO_Dataset/ + ├── images/ + │ ├── train/ + │ └── val/ + ├── labels/ + │ ├── train/ + │ └── val/ + └── data.yaml + ``` + +## Acknowledgments + +- This toolkit was created by Mridankan Mandal. +- This toolkit is designed for the [iSAID dataset](https://captain-whu.github.io/iSAID/). Please cite the original authors if you use this dataset in your research. \ No newline at end of file diff --git a/new_preprocess/convert_to_yolo.py b/new_preprocess/convert_to_yolo.py new file mode 100644 index 0000000..d115ab3 --- /dev/null +++ b/new_preprocess/convert_to_yolo.py @@ -0,0 +1,125 @@ +#Convert the preprocessed iSAID Dataset to YOLO Segmentation Format +#This program converts the preprocessed iSAID dataset into a format suitable for YOLO segmentation +#tasks, including copying images and converting annotations to YOLO format. + +import os +import json +import shutil +from pathlib import Path +import argparse + +def parse_args(): + #Parse command line arguments for input and output directories. + parser = argparse.ArgumentParser(description="Convert preprocessed iSAID dataset to YOLO segmentation format.") + parser.add_argument('--datadir', type=str, default='./iSAID_patches', + help="Path to the root directory of the preprocessed iSAID dataset (input).") + parser.add_argument('--outdir', type=str, default='./iSAID_YOLO_Dataset', + help="Path to the root directory where the YOLO formatted dataset will be saved (output).") + return parser.parse_args() + +def convert_isaid_to_yolo_seg(input_root: str, output_root: str): + print(f"Starting conversion from '{input_root}' to YOLO format at '{output_root}'...") + + #Create YOLO directory structure for images and labels. + for split in ("train", "val", "test"): + (Path(output_root) / "images" / split).mkdir(parents=True, exist_ok=True) + (Path(output_root) / "labels" / split).mkdir(parents=True, exist_ok=True) + + #Process train and validation splits with annotations. + for split in ("train", "val"): + json_path = Path(input_root) / split / f"instancesonly_filtered_{split}.json" + if not json_path.exists(): + print(f"Warning: JSON file not found for '{split}' split. Skipping: {json_path}") + continue + + print(f"Processing '{split}' split...") + #Load COCO-format annotation file. + with open(json_path, 'r') as f: + data = json.load(f) + + #Create lookup dictionaries for images and annotations. + images = {img["id"]: img for img in data["images"]} + + annos = {} + for ann in data["annotations"]: + annos.setdefault(ann["image_id"], []).append(ann) + + #Create category ID to index mapping for YOLO format. + cat_ids = sorted(c["id"] for c in data["categories"]) + catid2idx = {cid: idx for idx, cid in enumerate(cat_ids)} + + #Process each image and its annotations. + for img_id, img_info in images.items(): + fname = img_info["file_name"] + w, h = img_info["width"], img_info["height"] + + #Copy image file to YOLO structure. + src_img_path = Path(input_root) / split / "images" / fname + dst_img_path = Path(output_root) / "images" / split / fname + if src_img_path.exists(): + shutil.copy(src_img_path, dst_img_path) + else: + print(f"Warning: Source image not found: {src_img_path}") + + #Convert annotations to YOLO segmentation format. + label_path = Path(output_root) / "labels" / split / f"{Path(fname).stem}.txt" + with open(label_path, "w") as f: + for ann in annos.get(img_id, []): + cls_idx = catid2idx[ann["category_id"]] + + #Convert segmentation coordinates to normalized YOLO format. + if ann.get("segmentation") and len(ann["segmentation"]) > 0: + seg = ann["segmentation"][0] + if len(seg) >= 6 and len(seg) % 2 == 0: + seg_n = [coord / w if i % 2 == 0 else coord / h for i, coord in enumerate(seg)] + + parts = [str(cls_idx)] + parts += [f"{v:.6f}" for v in seg_n] + f.write(" ".join(parts) + "\n") + + #Process test split which only has images, and no annotations. + print("Processing 'test' split...") + test_json_path = Path(input_root) / "test" / "instancesonly_filtered_test.json" + if test_json_path.exists(): + with open(test_json_path, 'r') as f: + test_data = json.load(f) + for img in test_data["images"]: + fname = img["file_name"] + + #Copy the test images. + src_img_path = Path(input_root) / "test" / "images" / fname + dst_img_path = Path(output_root) / "images" / "test" / fname + + if src_img_path.exists(): + shutil.copy(src_img_path, dst_img_path) + else: + print(f"Warning: Source image not found: {src_img_path}") + + #Create empty label files for test images. + label_path = Path(output_root) / "labels" / "test" / f"{Path(fname).stem}.txt" + with open(label_path, "w") as f: + pass + + #Create YOLO dataset .yml configuration file. + print("Writing data.yaml file...") + train_json_path = Path(input_root) / "train" / "instancesonly_filtered_train.json" + if train_json_path.exists(): + with open(train_json_path, 'r') as f: + train_data = json.load(f) + names = [c["name"] for c in sorted(train_data["categories"], key=lambda x: x["id"])] + + yaml_path = Path(output_root) / "data.yaml" + with open(yaml_path, "w") as f: + f.write(f"train: images/train\n") + f.write(f"val: images/val\n") + f.write(f"test: images/test\n\n") + f.write(f"nc: {len(names)}\n") + f.write(f"names: {names}\n") + else: + print("Warning: Could not find train JSON file to extract class names for data.yaml") + + print("Conversion complete.") + +if __name__ == "__main__": + args = parse_args() + convert_isaid_to_yolo_seg(args.datadir, args.outdir) \ No newline at end of file diff --git a/new_preprocess/generate_test_json.py b/new_preprocess/generate_test_json.py new file mode 100644 index 0000000..de3f273 --- /dev/null +++ b/new_preprocess/generate_test_json.py @@ -0,0 +1,90 @@ +#This program generates a JSON file for the iSAID dataset test images in COCO format. +#It scans the specified directory for test images, collects their metadata, and writes it to a JSON file. +#The output JSON file contains image IDs, dimensions, and file names, but no annotations. + +import argparse +import json +import os +import cv2 +from natsort import natsorted + +def get_category_info(): + #Define the 16 object categories used in iSAID dataset. + return [ + {'id': 0, 'name': 'unlabeled'}, {'id': 1, 'name': 'ship'}, + {'id': 2, 'name': 'storage_tank'}, {'id': 3, 'name': 'baseball_diamond'}, + {'id': 4, 'name': 'tennis_court'}, {'id': 5, 'name': 'basketball_court'}, + {'id': 6, 'name': 'Ground_Track_Field'}, {'id': 7, 'name': 'Bridge'}, + {'id': 8, 'name': 'Large_Vehicle'}, {'id': 9, 'name': 'Small_Vehicle'}, + {'id': 10, 'name': 'Helicopter'}, {'id': 11, 'name': 'Swimming_pool'}, + {'id': 12, 'name': 'Roundabout'}, {'id': 13, 'name': 'Soccer_ball_field'}, + {'id': 14, 'name': 'plane'}, {'id': 15, 'name': 'Harbor'} + ] + +def main(args): + #Process specified dataset splits. + sets = args.set.split(',') + + for data_set in sets: + if data_set != 'test': + print(f"Skipping non-test set: {data_set}") + continue + + #Scan the test images directory. + ann_dir = os.path.join(args.datadir, data_set, 'images') + print(f"Scanning {ann_dir} for test images...") + + if not os.path.exists(ann_dir): + print(f"Directory not found: {ann_dir}") + continue + + #Collect all test image information. + images = [] + img_id = 0 + for root, _, files in os.walk(ann_dir): + for filename in natsorted(files): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + + #Skip annotation/mask images, keep only original images. + if '_instance_color_RGB' in filename or '_instance_id_RGB' in filename: + continue + + #Read image to get dimensions. + img_path = os.path.join(root, filename) + img = cv2.imread(img_path) + if img is None: + print(f"Warning: failed to read {filename}") + continue + h, w = img.shape[:2] + + #Add image information to list. + images.append({ + 'id': img_id, + 'width': w, + 'height': h, + 'file_name': filename + }) + img_id += 1 + + #Create COCO format JSON structure for test images without annotations. + ann_dict = { + 'images': images, + 'categories': get_category_info(), + 'annotations': [] + } + + #Write the test JSON file. + os.makedirs(os.path.join(args.outdir, data_set), exist_ok=True) + out_file = os.path.join(args.outdir, data_set, 'instancesonly_filtered_test.json') + with open(out_file, 'w') as f: + json.dump(ann_dict, f, indent=4) + print(f"Wrote {len(images)} test image entries to {out_file}") + +if __name__ == '__main__': + #Parse command line arguments for directory paths. + parser = argparse.ArgumentParser(description='Generate test image IDs JSON') + parser.add_argument('--outdir', default='./iSAID_patches', type=str) + parser.add_argument('--datadir', default='./iSAID_patches', type=str) + parser.add_argument('--set', default="test", type=str) + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/new_preprocess/preprocess.py b/new_preprocess/preprocess.py new file mode 100644 index 0000000..3fddbae --- /dev/null +++ b/new_preprocess/preprocess.py @@ -0,0 +1,155 @@ +#This program converts iSAID patches to COCO-style JSON format for object detection tasks. +#It processes images and their corresponding instance masks, extracting annotations in COCO format. + +import argparse +import os +import json +import cv2 +import numpy as np +from natsort import natsorted +from pycocotools import mask as maskUtils +from skimage import measure + +def parse_args(): + #Parse command line arguments for processing parameters. + parser = argparse.ArgumentParser(description='Convert iSAID patches to COCO-style JSON') + parser.add_argument('--datadir', default='./iSAID_patches', type=str) + parser.add_argument('--outdir', default='./iSAID_patches', type=str) + parser.add_argument('--set', default="train,val", type=str) + return parser.parse_args() + +def get_category_info(): + #Define the 16 object categories used in iSAID dataset. + return [ + {'id': 0, 'name': 'unlabeled'}, {'id': 1, 'name': 'ship'}, + {'id': 2, 'name': 'storage_tank'}, {'id': 3, 'name': 'baseball_diamond'}, + {'id': 4, 'name': 'tennis_court'}, {'id': 5, 'name': 'basketball_court'}, + {'id': 6, 'name': 'Ground_Track_Field'}, {'id': 7, 'name': 'Bridge'}, + {'id': 8, 'name': 'Large_Vehicle'}, {'id': 9, 'name': 'Small_Vehicle'}, + {'id': 10, 'name': 'Helicopter'}, {'id': 11, 'name': 'Swimming_pool'}, + {'id': 12, 'name': 'Roundabout'}, {'id': 13, 'name': 'Soccer_ball_field'}, + {'id': 14, 'name': 'plane'}, {'id': 15, 'name': 'Harbor'} + ] + +def main(args): + categories = get_category_info() + + #Process each dataset split (train/val). + for split in args.set.split(','): + print(f"Processing split: {split}") + patch_dir = os.path.join(args.datadir, split, 'images') + if not os.path.exists(patch_dir): + print(f"Directory not found: {patch_dir}") + continue + + #Initialize lists for COCO format data. + images = [] + annotations = [] + ann_id = 0 + img_id = 0 + + #Get all image files (excluding instance mask files). + all_files = natsorted(os.listdir(patch_dir)) + image_files = [f for f in all_files if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_instance_' not in f] + + #Process each image and its corresponding instance mask. + for img_file in image_files: + base_name, img_ext = os.path.splitext(img_file) + ins_file = f"{base_name}_instance_id_RGB.png" + ins_path = os.path.join(patch_dir, ins_file) + img_path = os.path.join(patch_dir, img_file) + + #Skip if instance mask does not exist. + if not os.path.exists(ins_path): + continue + + #Read the original image. + img = cv2.imread(img_path) + if img is None: + print(f"Could not read image: {img_path}") + continue + h, w, _ = img.shape + + #Add image info to COCO format. + images.append({ + 'id': img_id, + 'width': w, + 'height': h, + 'file_name': img_file, + }) + + #Read the instance ID mask. + instance_img = cv2.imread(ins_path) + if instance_img is None: + print(f"Could not read instance image: {ins_path}") + img_id += 1 + continue + + #Decode instance IDs from RGB channels (iSAID specific encoding). + r_channel = instance_img[:, :, 2].astype(np.int32) + g_channel = instance_img[:, :, 1].astype(np.int32) + instance_map = (r_channel // 10 * 256) + g_channel + + #Find all unique instance IDs in the image. + unique_instances = np.unique(instance_map) + + #Process each instance to create annotations. + for instance_id in unique_instances: + if instance_id == 0: continue + + #Extract class ID from instance ID. + class_id = instance_id // 1000 + if class_id == 0 or class_id > len(categories) - 1: continue + + #Create binary mask for this instance. + binary_mask = (instance_map == instance_id).astype(np.uint8) + + #Skip very small instances. + if binary_mask.sum() < 10: continue + + #Extract contours for segmentation polygons. + contours = measure.find_contours(binary_mask, 0.5) + + segmentation = [] + for contour in contours: + contour = np.flip(contour, axis=1) + segmentation.append(contour.ravel().tolist()) + + if not segmentation: continue + + #Calculate area and bounding box using COCO tools. + rle = maskUtils.encode(np.asfortranarray(binary_mask)) + area = float(maskUtils.area(rle)) + bbox = maskUtils.toBbox(rle).tolist() + + #Add annotation in COCO format. + annotations.append({ + 'id': ann_id, + 'image_id': img_id, + 'category_id': int(class_id), + 'segmentation': segmentation, + 'area': area, + 'bbox': bbox, + 'iscrowd': 0 + }) + ann_id += 1 + img_id += 1 + + #Create final COCO format dictionary. + coco_dict = { + 'images': images, + 'annotations': annotations, + 'categories': categories + } + + #Write the COCO format JSON file. + out_json_path = os.path.join(args.outdir, split, f'instancesonly_filtered_{split}.json') + os.makedirs(os.path.dirname(out_json_path), exist_ok=True) + with open(out_json_path, 'w') as f: + json.dump(coco_dict, f, indent=4) + + print(f"Wrote {len(images)} images and {len(annotations)} annotations to {out_json_path}") + +if __name__ == '__main__': + args = parse_args() + main(args) \ No newline at end of file diff --git a/new_preprocess/requirements.txt b/new_preprocess/requirements.txt new file mode 100644 index 0000000..c3add85 --- /dev/null +++ b/new_preprocess/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +numpy +natsort +pycocotools +scikit-image diff --git a/new_preprocess/split.py b/new_preprocess/split.py new file mode 100644 index 0000000..f6e544f --- /dev/null +++ b/new_preprocess/split.py @@ -0,0 +1,114 @@ +#This program splits large images from the iSAID dataset into smaller patches with specified overlap. +#It processes images from specified dataset splits (train, val, test) and saves the patches +#into a target directory, maintaining the original image structure. + +import cv2 +import os +import numpy as np +from natsort import natsorted +from glob import glob +from shutil import copyfile +import argparse + +def main(args): + #Extract command line arguments for processing. + src_root = args.src + tar_root = args.tar + splits = args.set.split(',') + subfolder = args.image_sub_folder + patch_h, patch_w = args.patch_height, args.patch_width + overlap = args.overlap_area + exts = ['.png', '.jpg', '.jpeg'] + + #Process each dataset split (train/val/test). + for split in splits: + if split not in ('train', 'val', 'test'): + print(f"Skipping invalid split: {split}") + continue + + print(f"\n>> Processing split: {split}") + + #Set up source and destination directories + src_dir = os.path.join(src_root, split, subfolder) + dst_dir = os.path.join(tar_root, split, subfolder) + + os.makedirs(dst_dir, exist_ok=True) + + #Define file suffixes for different image types. + suffixes = [''] + if split in ('train', 'val'): + suffixes.extend(['_instance_color_RGB', '_instance_id_RGB']) + + #Find all base image names in the source directory. + base_ids = [] + if not os.path.exists(src_dir): + print(f" [ERROR] Source directory not found: {src_dir}") + continue + else: + for ext in exts: + for fpath in glob(os.path.join(src_dir, f'*{ext}')): + name = os.path.splitext(os.path.basename(fpath))[0] + if '_' not in name: + base_ids.append(name) + base_ids = natsorted(list(set(base_ids))) + + print(f"Found {len(base_ids)} raw images in {src_dir}") + + #Process each base image with all its variants. + for base in base_ids: + for suf in suffixes: + #Find the image file with current suffix. + fpath = None + for ext in exts: + candidate = os.path.join(src_dir, f"{base}{suf}{ext}") + if os.path.exists(candidate): + fpath = candidate + break + + if fpath is None: + if not (split == 'test' and suf != ''): + print(f" [WARN] missing file: {base}{suf} (searched exts: {exts})") + continue + + #Read the image. + img = cv2.imread(fpath) + if img is None: + print(f" [ERROR] could not read: {os.path.basename(fpath)}") + continue + h, w = img.shape[:2] + + #Split large images into patches with overlap. + if h > patch_h and w > patch_w: + for y0 in range(0, h, patch_h - overlap): + for x0 in range(0, w, patch_w - overlap): + y1 = min(y0 + patch_h, h) + x1 = min(x0 + patch_w, w) + + #Ensure patch has exact dimensions by adjusting start coordinates. + final_y0 = y1 - patch_h + final_x0 = x1 - patch_w + + #Extract and save the patch. + patch = img[final_y0:y1, final_x0:x1] + + out_ext = os.path.splitext(fpath)[1] + out_name = f"{base}_{final_y0}_{y1}_{final_x0}_{x1}{suf}{out_ext}" + out_path = os.path.join(dst_dir, out_name) + cv2.imwrite(out_path, patch) + else: + #Copy small images as they are without splitting. + out_name = os.path.basename(fpath) + copyfile(fpath, os.path.join(dst_dir, out_name)) + +if __name__ == '__main__': + #Parse command line arguments for image splitting parameters. + parser = argparse.ArgumentParser(description='Splitting the iSAID Images') + parser.add_argument('--src', default='./iSAID_dataset', type=str) + parser.add_argument('--tar', default='./iSAID_patches', type=str) + parser.add_argument('--image_sub_folder', default='images', type=str) + parser.add_argument('--set', default="train,val,test", type=str) + parser.add_argument('--patch_width', default=800, type=int) + parser.add_argument('--patch_height', default=800, type=int) + parser.add_argument('--overlap_area', default=200, type=int) + args = parser.parse_args() + main(args) \ No newline at end of file