GAN-Based-Synthetic-MRI-Augmentation/filter_synthetic_cn.py at main · Moses-Mk/GAN-Based-Synthetic-MRI-Augmentation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# ============================================================
# AUTOMATIC FILTERING OF SYNTHETIC CN SLICES
# ============================================================

"""
This script filters out low-quality synthetic CN MRI slices generated
by a GAN. The goal is to retain only high-quality images suitable
for data augmentation in the classifier training.

Filtering criteria:
1. Mean intensity threshold: removes overly dark or collapsed images.
2. Foreground area: ensures sufficient brain coverage.
3. Edge strength (Laplacian variance): removes blurry images.
"""

import os
import cv2
import numpy as np
from tqdm import tqdm

# -----------------------------
# CONFIGURATION
# -----------------------------
SYN_DIR = "slices_256/augmented_10/CN_synthetic"          # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/augmented_10/CN_synthetic_filtered"  # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)

SYN_DIR = "slices_256/augmented_20/CN_synthetic"          # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/augmented_20/CN_synthetic_filtered"  # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)

SYN_DIR = "slices_256/train_augmented/CN_synthetic"          # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/train_augmented/CN_synthetic_filtered"  # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)


# List all PNG images in the synthetic directory
files = [f for f in os.listdir(SYN_DIR) if f.endswith(".png")]

# Counters for reporting
kept = 0
removed = 0

# -----------------------------
# FILTERING LOOP
# -----------------------------
for fname in tqdm(files):
    path = os.path.join(SYN_DIR, fname)

    # Load as grayscale image
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue

    # Normalize to [0,1]
    img = img / 255.0

    # 1️⃣ Mean intensity (reject very dark images)
    mean_intensity = img.mean()

    # 2️⃣ Foreground area (proportion of pixels above 0.1 intensity)
    foreground_area = np.sum(img > 0.1) / img.size

    # 3️⃣ Edge strength using Laplacian variance (reject blurry images)
    laplacian_var = cv2.Laplacian((img*255).astype(np.uint8), cv2.CV_64F).var()

    # ---- Apply filtering thresholds ----
    if (
        mean_intensity > 0.05 and
        foreground_area > 0.05 and
        laplacian_var > 15
    ):
        # Save filtered image
        cv2.imwrite(os.path.join(FILTERED_DIR, fname), (img*255).astype(np.uint8))
        kept += 1
    else:
        removed += 1

# -----------------------------
# REPORT RESULTS
# -----------------------------
print("Filtering complete.")
print("Kept:", kept)
print("Removed:", removed)