Skip to content
This repository was archived by the owner on Jul 10, 2025. It is now read-only.

Commit 195edf7

Browse files
apply_to_images for ToGray (#2558)
* apply_to_images for ToGray * revert tests * Update albumentations/augmentations/pixel/functional.py Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> * correct docstrings * fix docstring desc * fix return statement * fix docstring --------- Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>
1 parent 1bea491 commit 195edf7

File tree

3 files changed

+151
-29
lines changed

3 files changed

+151
-29
lines changed

albumentations/augmentations/pixel/functional.py

Lines changed: 99 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
normalize_per_image,
3939
power,
4040
preserve_channel_dim,
41+
reshape_for_channel,
42+
restore_from_channel,
4143
sz_lut,
4244
uint8_io,
4345
)
@@ -1320,37 +1322,103 @@ def to_gray_weighted_average(img: np.ndarray) -> np.ndarray:
13201322
@uint8_io
13211323
@clipped
13221324
def to_gray_from_lab(img: np.ndarray) -> np.ndarray:
1323-
"""Convert an RGB image to grayscale using the L channel from the LAB color space.
1325+
"""Convert an RGB image or batch of images to grayscale using LAB color space.
13241326
1325-
This function converts the RGB image to the LAB color space and extracts the L channel.
1326-
The LAB color space is designed to approximate human vision, where L represents lightness.
1327+
This function converts RGB images to grayscale by first converting to LAB color space
1328+
and then extracting the L (lightness) channel. It uses albucore's reshape utilities
1329+
to efficiently handle batches/volumes by processing them as a single tall image.
13271330
1328-
Key aspects of this method:
1329-
1. The L channel represents the lightness of each pixel, ranging from 0 (black) to 100 (white).
1330-
2. It's more perceptually uniform than RGB, meaning equal changes in L values correspond to
1331-
roughly equal changes in perceived lightness.
1332-
3. The L channel is independent of the color information (A and B channels), making it
1333-
suitable for grayscale conversion.
1331+
Implementation Details:
1332+
The function uses albucore's reshape_for_channel and restore_from_channel functions:
1333+
- reshape_for_channel: Flattens batches/volumes to 2D format for OpenCV processing
1334+
- restore_from_channel: Restores the original shape after processing
13341335
1335-
This method can be particularly useful when you want a grayscale image that closely
1336-
matches human perception of lightness, potentially preserving more perceived contrast
1337-
than simple RGB-based methods.
1336+
This enables processing all images in a single OpenCV call
13381337
13391338
Args:
1340-
img (np.ndarray): Input RGB image as a numpy array.
1339+
img: Input RGB image(s) as a numpy array. Must have 3 channels in the last dimension.
1340+
Supported shapes:
1341+
- Single image: (H, W, 3)
1342+
- Batch of images: (N, H, W, 3)
1343+
- Volume: (D, H, W, 3)
1344+
- Batch of volumes: (N, D, H, W, 3)
1345+
1346+
Supported dtypes:
1347+
- np.uint8: Values in range [0, 255]
1348+
- np.float32: Values in range [0, 1]
13411349
13421350
Returns:
1343-
np.ndarray: Grayscale image as a 2D numpy array, representing the L (lightness) channel.
1344-
Values are scaled to match the input image's data type range.
1351+
Grayscale image(s) with the same spatial dimensions as input but without channel dimension:
1352+
- Single image: (H, W)
1353+
- Batch of images: (N, H, W)
1354+
- Volume: (D, H, W)
1355+
- Batch of volumes: (N, D, H, W)
13451356
1346-
Image types:
1347-
uint8, float32
1357+
The output dtype matches the input dtype. For float inputs, the L channel
1358+
is normalized to [0, 1] by dividing by 100.
13481359
1349-
Number of channels:
1350-
3
1360+
Raises:
1361+
ValueError: If the last dimension is not 3 (RGB channels)
1362+
1363+
Examples:
1364+
>>> # Single image
1365+
>>> img = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
1366+
>>> gray = to_gray_from_lab(img)
1367+
>>> assert gray.shape == (100, 100)
1368+
1369+
>>> # Batch of images - efficiently processed without loops
1370+
>>> batch = np.random.randint(0, 256, (10, 100, 100, 3), dtype=np.uint8)
1371+
>>> gray_batch = to_gray_from_lab(batch)
1372+
>>> assert gray_batch.shape == (10, 100, 100)
1373+
1374+
>>> # Volume (e.g., video frames or 3D medical data)
1375+
>>> volume = np.random.randint(0, 256, (16, 100, 100, 3), dtype=np.uint8)
1376+
>>> gray_volume = to_gray_from_lab(volume)
1377+
>>> assert gray_volume.shape == (16, 100, 100)
1378+
1379+
>>> # Float32 input
1380+
>>> img_float = img.astype(np.float32) / 255.0
1381+
>>> gray_float = to_gray_from_lab(img_float)
1382+
>>> assert 0 <= gray_float.min() <= gray_float.max() <= 1.0
1383+
1384+
Note:
1385+
The LAB color space provides perceptually uniform grayscale conversion,
1386+
where the L (lightness) channel represents human perception of brightness
1387+
better than simple RGB averaging or other methods.
13511388
13521389
"""
1353-
return cv2.cvtColor(img, cv2.COLOR_RGB2LAB)[..., 0]
1390+
original_dtype = img.dtype
1391+
ndim = img.ndim
1392+
1393+
# Handle single image case by adding a batch dimension
1394+
if ndim == 3:
1395+
# Add batch dimension to make it (1, H, W, C)
1396+
return cv2.cvtColor(img, cv2.COLOR_RGB2LAB)[..., 0]
1397+
1398+
# Determine dimensions for reshape_for_channel
1399+
if ndim == 4:
1400+
# Batch of images (N, H, W, C) or single image with added batch dimension
1401+
has_batch_dim = True
1402+
has_depth_dim = False
1403+
elif ndim == 5:
1404+
# Batch of volumes (N, D, H, W, C)
1405+
has_batch_dim = True
1406+
has_depth_dim = True
1407+
1408+
# Use reshape utilities from albucore for efficient batch processing
1409+
flattened, original_shape = reshape_for_channel(img, has_batch_dim=has_batch_dim, has_depth_dim=has_depth_dim)
1410+
1411+
lab = cv2.cvtColor(flattened, cv2.COLOR_RGB2LAB)
1412+
1413+
grayscale_flat = lab[..., 0]
1414+
grayscale = restore_from_channel(
1415+
grayscale_flat,
1416+
original_shape,
1417+
has_batch_dim=has_batch_dim,
1418+
has_depth_dim=has_depth_dim,
1419+
)
1420+
1421+
return grayscale / 100.0 if original_dtype == np.float32 else grayscale
13541422

13551423

13561424
@clipped
@@ -1453,10 +1521,14 @@ def to_gray_pca(img: np.ndarray) -> np.ndarray:
14531521
in the color data.
14541522
14551523
Args:
1456-
img (np.ndarray): Input image as a numpy array with shape (height, width, channels).
1524+
img (np.ndarray): Input image as a numpy array. Can be:
1525+
- Single multi-channel image: (H, W, C)
1526+
- Batch of multi-channel images: (N, H, W, C)
1527+
- Single multi-channel volume: (D, H, W, C)
1528+
- Batch of multi-channel volumes: (N, D, H, W, C)
14571529
14581530
Returns:
1459-
np.ndarray: Grayscale image as a 2D numpy array with shape (height, width).
1531+
np.ndarray: Grayscale image with the same spatial dimensions as input.
14601532
If input is uint8, output is uint8 in range [0, 255].
14611533
If input is float32, output is float32 in range [0, 1].
14621534
@@ -1474,14 +1546,14 @@ def to_gray_pca(img: np.ndarray) -> np.ndarray:
14741546
"""
14751547
dtype = img.dtype
14761548
# Reshape the image to a 2D array of pixels
1477-
pixels = img.reshape(-1, img.shape[2])
1549+
pixels = img.reshape(-1, img.shape[-1])
14781550

14791551
# Perform PCA
14801552
pca = PCA(n_components=1)
14811553
pca_result = pca.fit_transform(pixels)
14821554

14831555
# Reshape back to image dimensions and scale to 0-255
1484-
grayscale = pca_result.reshape(img.shape[:2])
1556+
grayscale = pca_result.reshape(img.shape[:-1])
14851557
grayscale = normalize_per_image(grayscale, "min_max")
14861558

14871559
return from_float(grayscale, target_dtype=dtype) if dtype == np.uint8 else grayscale
@@ -1557,14 +1629,13 @@ def grayscale_to_multichannel(
15571629
np.ndarray: Multi-channel image with shape (height, width, num_channels)
15581630
15591631
"""
1560-
# If output should be single channel, just squeeze and return
1632+
# If output should be single channel, add channel dimension if needed
15611633
if num_output_channels == 1:
15621634
return grayscale_image
15631635

1564-
# For multi-channel output, squeeze and stack
15651636
squeezed = np.squeeze(grayscale_image)
1566-
1567-
return cv2.merge([squeezed] * num_output_channels)
1637+
# For multi-channel output, stack channels
1638+
return np.stack([squeezed] * num_output_channels, axis=-1)
15681639

15691640

15701641
@preserve_channel_dim

albumentations/augmentations/pixel/transforms.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3458,6 +3458,57 @@ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
34583458

34593459
return fpixel.to_gray(img, self.num_output_channels, self.method)
34603460

3461+
def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
3462+
"""Apply ToGray to a batch of images.
3463+
3464+
Args:
3465+
images (np.ndarray): Batch of images with shape (N, H, W, C) or (N, H, W).
3466+
**params (Any): Additional parameters.
3467+
3468+
Returns:
3469+
np.ndarray: Batch of grayscale images.
3470+
3471+
"""
3472+
if is_grayscale_image(images, has_batch_dim=True):
3473+
warnings.warn("The image is already gray.", stacklevel=2)
3474+
return images
3475+
3476+
return fpixel.to_gray(images, self.num_output_channels, self.method)
3477+
3478+
def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
3479+
"""Apply ToGray to a single volume.
3480+
3481+
Args:
3482+
volume (np.ndarray): Volume with shape (D, H, W, C) or (D, H, W).
3483+
**params (Any): Additional parameters.
3484+
3485+
Returns:
3486+
np.ndarray: Grayscale volume.
3487+
3488+
"""
3489+
if is_grayscale_image(volume, has_depth_dim=True):
3490+
warnings.warn("The volume is already gray.", stacklevel=2)
3491+
return volume
3492+
3493+
return fpixel.to_gray(volume, self.num_output_channels, self.method)
3494+
3495+
def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
3496+
"""Apply ToGray to a batch of volumes.
3497+
3498+
Args:
3499+
volumes (np.ndarray): Batch of volumes with shape (N, D, H, W, C) or (N, D, H, W).
3500+
**params (Any): Additional parameters.
3501+
3502+
Returns:
3503+
np.ndarray: Batch of grayscale volumes.
3504+
3505+
"""
3506+
if is_grayscale_image(volumes, has_batch_dim=True, has_depth_dim=True):
3507+
warnings.warn("The volumes are already gray.", stacklevel=2)
3508+
return volumes
3509+
3510+
return fpixel.to_gray(volumes, self.num_output_channels, self.method)
3511+
34613512

34623513
class ToRGB(ImageOnlyTransform):
34633514
"""Convert an input image from grayscale to RGB format.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"PyYAML",
99
"typing-extensions>=4.9.0; python_version<'3.10'",
1010
"pydantic>=2.9.2",
11-
"albucore==0.0.25",
11+
"albucore==0.0.26",
1212
"eval-type-backport; python_version<'3.10'",
1313
]
1414

0 commit comments

Comments
 (0)