Fix: correct import, English docstring, safer channel heuristic; robust y channel-last detection (refs #8366)

林旻佑 · 林旻佑 · commit 02d528c88594 · 2025-08-16T16:13:04.000+08:00
Signed-off-by: 林旻佑 &lt;linminyou@linminyoudeMacBook-Air.local&gt;
diff --git a/monai/inferers/utils.py b/monai/inferers/utils.py
@@ -40,37 +40,47 @@
 
 def ensure_channel_first(x: torch.Tensor, spatial_ndim: Optional[int] = None) -> Tuple[torch.Tensor, int]:
     """
-    將張量標準化為 channel-first（N,C,spatial...）。
-    回傳 (可能已轉換的張量, 原本 channel 維度：1 表示本來就在 dim=1；-1 表示本來在最後一維)。
+    Normalize a tensor to channel-first layout (N, C, spatial...).
 
-    支援常見情況：
-      - [N, C, *spatial] -> 原樣返回
-      - [N, *spatial, C] -> 移動最後一維到 dim=1
-    其他模糊情況則丟出 ValueError，避免悄悄算錯。
-    """
-    if not isinstance(x, torch.Tensor):
-        raise TypeError(f"expect torch.Tensor, got {type(x)}")
-    if x.ndim < 3:
-        raise ValueError(f"expect >=3 dims (N,C,spatial...), got shape={tuple(x.shape)}")
+    Args:
+        x: Tensor with shape (N, C, spatial...) or (N, spatial..., C).
+        spatial_ndim: Number of spatial dimensions. If None, inferred as x.ndim - 2.
+
+    Returns:
+        A tuple (x_cf, orig_channel_dim):
+        - x_cf: the tensor in channel-first layout.
+        - orig_channel_dim: 1 if input was already channel-first; -1 if the channel was last.
+
+    Raises:
+        TypeError: if x is not a torch.Tensor.
+        ValueError: if x.ndim < 3 or the channel dimension cannot be inferred unambiguously.
 
-    # 若未指定，估個常見的 2D/3D 空間維度數，僅用於錯誤訊息與判斷參考
+    Notes:
+        Uses a small-channel heuristic (<=32) typical for segmentation/classification. When ambiguous,
+        prefers preserving the input layout or raises ValueError to avoid silent errors.
+    """
+    
+   
     if spatial_ndim is None:
-        spatial_ndim = max(2, min(3, x.ndim - 2))
+        spatial_ndim = x.ndim - 2
 
-    # 簡單啟發式：C 通常不會太大（<=512）
-    c_first_ok = x.shape[1] <= 512
-    c_last_ok = x.shape[-1] <= 512
+    threshold = 32 
+    s1, sl = int(x.shape[1]), int(x.shape[-1])
 
-    # 優先保留 channel-first
-    if c_first_ok and x.ndim >= 2 + spatial_ndim:
+    if s1 <= threshold and sl > threshold:
         return x, 1
-    if c_last_ok:
+    if sl <= threshold and s1 > threshold:
         return x.movedim(-1, 1), -1
 
+    if s1 <= threshold and sl <= threshold:
+        return x, 1
+
     raise ValueError(
-        f"cannot infer channel dim for shape={tuple(x.shape)}; "
-        f"expected [N,C,spatial...] or [N,spatial...,C] (spatial_ndim≈{spatial_ndim})"
-    )
+        f"cannot infer channel dim for shape={tuple(x.shape)}; expected [N,C,spatial...] or [N,spatial...,C]; "
+        f"both dim1={s1} and dim-1={sl} look like spatial dims"
+    )   
+    
+    
 def sliding_window_inference(
     inputs: torch.Tensor | MetaTensor,
     roi_size: Sequence[int] | int,