Skip to content

Commit ea7279f

Browse files
authored
Merge pull request #2 from nidhinthomas-ai/nidhin_opencv_detection
ENH: Nidhin opencv detection
2 parents 85edfa2 + e0038ea commit ea7279f

File tree

15 files changed

+978
-47
lines changed

15 files changed

+978
-47
lines changed

.gitlab-ci.yml

Lines changed: 0 additions & 41 deletions
This file was deleted.

README.md

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,72 @@
22

33
## Installing the project
44

5-
To install the project, clone the repository and install
6-
the package, core dependencies, and optional dependencies
7-
by calling:
5+
Download the package from the following GitHub repository:
6+
7+
```bash
8+
git clone git@github.com:lanl/ldrd_neat_ml.git
9+
```
10+
11+
Install the project, core dependencies,
12+
and optional dependencies by calling:
813

914
```
1015
python -m pip install -v ".[dev]"
1116
```
1217

18+
## Writing a `.yaml` input file for OpenCV detection
19+
20+
The workflow takes as input a `.yaml` configuration file with information
21+
on where to find the input image data for blob detection; save the
22+
output images.
23+
24+
The `.yaml` file should follow the format below (an example
25+
can be found at `neat_ml/data/opencv_detection_test.yaml`)
26+
Input paths can be provided as either absolute or relative
27+
file paths.
28+
29+
```yaml
30+
roots:
31+
work: path/to/save/output
32+
33+
datasets:
34+
- id: name_of_save_folder
35+
method: Currently only supports ``OpenCV`` (or ``opencv``) as input
36+
class: subfolder_for_image_class
37+
time_label: subfolder_for_timestamp
38+
39+
detection:
40+
img_dir: path/to/image/data (Can be a directory of ``.tiff`` images or a path to a single ``.tiff`` image.)
41+
debug: True/False for debug (`True` will save side-by-side figure
42+
of raw image next to bounding box overlay.)
43+
```
44+
45+
## Running OpenCV detection
46+
47+
To run the workflow with a given `.yaml` file:
48+
49+
`python run_workflow.py --config <YAML file> --steps detect`
50+
51+
To run the workflow using ``opencv_detection_test.yaml``:
52+
53+
1. download and install the project
54+
2. run the test-suite to download the test images from `pooch`
55+
3. run the following command to get the path where the images are stored
56+
57+
```
58+
python -c "import pooch; print(pooch.os_cache('test_images'))"
59+
```
60+
61+
4. replace ``datatsets:detection:img_dir`` `path/to/pooch/images` in the `.yaml` with the local filepath
62+
5. call the `run_workflow` command with `--config neat_ml/data/opencv_detection_test.yaml`
63+
64+
This should process and detect bubbles from the image file `images_raw.tiff` and
65+
place the outputs under ``roots:work`` filepath from the `.yaml` file
66+
67+
For information relevant to running the workflow:
68+
69+
`python run_workflow.py --help`
70+
1371
## Running the Main ML workflow
1472
1573
Note that the first incantation of the main ML
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
roots:
2+
work: neat_ml/tests/data/
3+
4+
datasets:
5+
- id: Test
6+
method: OpenCV
7+
class: ''
8+
time_label: ''
9+
10+
detection:
11+
img_dir: 'path/to/pooch/images/images_raw.tiff'
12+
debug: True

neat_ml/opencv/__init__.py

Whitespace-only changes.

neat_ml/opencv/detection.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
from pathlib import Path
2+
from typing import Sequence
3+
4+
import cv2
5+
import matplotlib.pyplot as plt
6+
import numpy as np
7+
import pandas as pd
8+
import skimage.color
9+
from joblib import Memory
10+
from tqdm.auto import tqdm
11+
12+
13+
__all__: Sequence[str] = [
14+
"run_opencv"
15+
]
16+
17+
18+
def _detect_single_image(
19+
img_path: Path
20+
) -> tuple[int, float, pd.DataFrame]:
21+
"""
22+
Detect bubbles in a single image using OpenCV's SimpleBlobDetector.
23+
24+
Parameters
25+
----------
26+
img_path : Path
27+
Absolute file path to the image to process.
28+
29+
Returns
30+
-------
31+
tuple[int, float, pd.DataFrame]
32+
num_blobs : int
33+
Number of blobs detected in the image.
34+
median_radius : float
35+
Median radius of detected blobs (NaN if none).
36+
bubble_data : pd.DataFrame
37+
DataFrame with one row per detected blob, each containing:
38+
- 'bubble_number' (int)
39+
- 'center' (tuple[float, float])
40+
- 'radius' (float)
41+
- 'area' (float)
42+
- 'bbox' (tuple[int, int, int, int])
43+
"""
44+
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # type: ignore[call-overload]
45+
if image is None:
46+
raise FileNotFoundError(f"Unable to read image file: {img_path}")
47+
48+
# the parameters for ``SimpleBlobDetector`` were determined manually
49+
# by hand-tuning via visual inspection NOT by using a standardized
50+
# hyperparameter optimization method (see: issue #13)
51+
params = cv2.SimpleBlobDetector_Params() # type: ignore[attr-defined]
52+
params.minThreshold = 10
53+
params.maxThreshold = 200
54+
params.thresholdStep = 10
55+
params.filterByColor = False
56+
params.filterByArea = True
57+
params.minArea = 20
58+
params.maxArea = 50000
59+
params.filterByCircularity = True
60+
params.minCircularity = 0.75
61+
params.filterByConvexity = True
62+
params.minConvexity = 0.80
63+
params.filterByInertia = True
64+
params.minInertiaRatio = 0.75
65+
66+
detector = cv2.SimpleBlobDetector_create(params) # type: ignore[attr-defined]
67+
keypoints = detector.detect(image)
68+
69+
bubble_data = pd.DataFrame(index=range(len(keypoints)),
70+
columns=["bubble_number", "center", "radius", "area", "bbox"]).fillna(np.nan)
71+
bubble_data[['center', 'bbox']] = bubble_data[['center', 'bbox']].astype('object')
72+
for idx, kp in enumerate(keypoints):
73+
cx, cy = kp.pt
74+
r = kp.size / 2.0
75+
bbox = (cx - r, cy - r, cx + r, cy + r)
76+
bubble_data_row = {
77+
"bubble_number": idx + 1,
78+
"center": (cx, cy),
79+
"radius": r,
80+
"area": np.pi * r**2,
81+
"bbox": bbox,
82+
}
83+
bubble_data.loc[idx] = pd.Series(bubble_data_row)
84+
85+
num_blobs = len(keypoints)
86+
median_radius = np.nanmedian(bubble_data["radius"])
87+
return num_blobs, median_radius, bubble_data
88+
89+
def _save_debug_overlay(
90+
img_path: Path,
91+
bubble_data: pd.DataFrame,
92+
out_dir: Path,
93+
) -> None:
94+
"""
95+
Create and save a side-by-side figure containing the original
96+
image next to the image overlaid with opencv segmentation mask
97+
98+
Parameters
99+
----------
100+
img_path : Path
101+
File path to the original image.
102+
bubble_data : pd.DataFrame
103+
DataFrame of bubble metadata as returned by _detect_single_image.
104+
out_dir : Path
105+
Directory where the debug PNG will be saved.
106+
"""
107+
image_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # type: ignore[call-overload]
108+
109+
if image_gray is None:
110+
raise FileNotFoundError(f"Could not read image for debug overlay: {img_path}")
111+
112+
image_rgb = skimage.color.gray2rgb(image_gray)
113+
114+
overlay = image_rgb.copy()
115+
116+
for index, bubble in bubble_data.iterrows():
117+
bbox = bubble["bbox"]
118+
x_min, y_min, x_max, y_max = map(int, bbox)
119+
cv2.rectangle(overlay, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
120+
121+
fig, ax = plt.subplots(1, 2, figsize=(12, 8))
122+
ax[0].imshow(image_rgb)
123+
ax[0].set_title("Original")
124+
ax[0].axis("off")
125+
ax[1].imshow(overlay)
126+
ax[1].set_title("Detected blobs")
127+
ax[1].axis("off")
128+
129+
png_name = f"{img_path.stem}_debug.png"
130+
fig.savefig(out_dir / png_name, dpi=300, bbox_inches="tight")
131+
plt.close(fig)
132+
133+
def run_opencv(
134+
df: pd.DataFrame,
135+
out_dir: Path,
136+
debug: bool = False,
137+
) -> pd.DataFrame:
138+
"""
139+
Detect bubbles in every image referenced by df using the
140+
OpenCV ``SimpleBlobDetector``.
141+
142+
Parameters
143+
----------
144+
df : pd.DataFrame
145+
dataframe containing absolute image filepaths.
146+
out_dir : Path
147+
Path to save the outputs.
148+
debug : bool
149+
If True save side by side diagnostic images.
150+
151+
Returns
152+
-------
153+
df_out : pandas.DataFrame
154+
Copy of df enriched with:
155+
num_blobs_opencv number of blobs detected
156+
median_radii_opencv median droplet radius
157+
"""
158+
if 'image_filepath' not in df.columns:
159+
raise ValueError("DataFrame must contain 'image_filepath' column.")
160+
161+
if not out_dir.is_absolute():
162+
raise ValueError(
163+
f"Absolute file path required, got {out_dir}"
164+
)
165+
166+
out_dir.mkdir(parents=True, exist_ok=True)
167+
168+
memory = Memory(location=out_dir / ".joblib_cache", verbose=0)
169+
cached_detect = memory.cache(_detect_single_image)
170+
171+
df_out = df.copy()
172+
df_out[["num_blobs_opencv", "median_radii_opencv"]] = np.nan
173+
174+
for idx, row in tqdm(
175+
df_out.iterrows(),
176+
total=df_out.shape[0],
177+
desc="OpenCV SimpleBlobDetector",
178+
):
179+
img_path = row.image_filepath
180+
181+
num_blobs, median_r, bubble_data = cached_detect(img_path)
182+
183+
df_bubbles = pd.DataFrame(bubble_data)
184+
df_bubbles.to_parquet(
185+
out_dir / f"{img_path.stem}_bubble_data.parquet.gzip",
186+
compression="gzip")
187+
188+
if debug:
189+
_save_debug_overlay(img_path, bubble_data, out_dir)
190+
191+
df_out.loc[idx, "num_blobs_opencv"] = num_blobs # type: ignore[index]
192+
df_out.loc[idx, "median_radii_opencv"] = median_r # type:ignore[index]
193+
194+
return df_out

0 commit comments

Comments
 (0)