-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter_synthetic_cn.py
More file actions
85 lines (67 loc) · 2.67 KB
/
filter_synthetic_cn.py
File metadata and controls
85 lines (67 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# ============================================================
# AUTOMATIC FILTERING OF SYNTHETIC CN SLICES
# ============================================================
"""
This script filters out low-quality synthetic CN MRI slices generated
by a GAN. The goal is to retain only high-quality images suitable
for data augmentation in the classifier training.
Filtering criteria:
1. Mean intensity threshold: removes overly dark or collapsed images.
2. Foreground area: ensures sufficient brain coverage.
3. Edge strength (Laplacian variance): removes blurry images.
"""
import os
import cv2
import numpy as np
from tqdm import tqdm
# -----------------------------
# CONFIGURATION
# -----------------------------
SYN_DIR = "slices_256/augmented_10/CN_synthetic" # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/augmented_10/CN_synthetic_filtered" # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)
SYN_DIR = "slices_256/augmented_20/CN_synthetic" # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/augmented_20/CN_synthetic_filtered" # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)
SYN_DIR = "slices_256/train_augmented/CN_synthetic" # Directory with raw GAN outputs
FILTERED_DIR = "slices_256/train_augmented/CN_synthetic_filtered" # Directory for filtered outputs
os.makedirs(FILTERED_DIR, exist_ok=True)
# List all PNG images in the synthetic directory
files = [f for f in os.listdir(SYN_DIR) if f.endswith(".png")]
# Counters for reporting
kept = 0
removed = 0
# -----------------------------
# FILTERING LOOP
# -----------------------------
for fname in tqdm(files):
path = os.path.join(SYN_DIR, fname)
# Load as grayscale image
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
if img is None:
continue
# Normalize to [0,1]
img = img / 255.0
# 1️⃣ Mean intensity (reject very dark images)
mean_intensity = img.mean()
# 2️⃣ Foreground area (proportion of pixels above 0.1 intensity)
foreground_area = np.sum(img > 0.1) / img.size
# 3️⃣ Edge strength using Laplacian variance (reject blurry images)
laplacian_var = cv2.Laplacian((img*255).astype(np.uint8), cv2.CV_64F).var()
# ---- Apply filtering thresholds ----
if (
mean_intensity > 0.05 and
foreground_area > 0.05 and
laplacian_var > 15
):
# Save filtered image
cv2.imwrite(os.path.join(FILTERED_DIR, fname), (img*255).astype(np.uint8))
kept += 1
else:
removed += 1
# -----------------------------
# REPORT RESULTS
# -----------------------------
print("Filtering complete.")
print("Kept:", kept)
print("Removed:", removed)