✨ Add support for architectures in timm.list_models()

shaneahmed · shaneahmed · commit d24dfbe0081b · 2025-12-02T14:24:44.000Z
diff --git a/tests/engines/test_feature_extractor.py b/tests/engines/test_feature_extractor.py
@@ -8,9 +8,7 @@
 import pytest
 import torch
 import zarr
-from click.testing import CliRunner
 
-from tiatoolbox import cli
 from tiatoolbox.models import IOSegmentorConfig
 from tiatoolbox.models.architecture.vanilla import CNNBackbone, TimmBackbone
 from tiatoolbox.models.engine.deep_feature_extractor import DeepFeatureExtractor
@@ -93,7 +91,13 @@ def test_feature_extractor_wsi(remote_sample: Callable, track_tmp_path: Path) ->
 
 
 @pytest.mark.parametrize(
-    "model", [CNNBackbone("resnet50"), TimmBackbone("efficientnet_b0", pretrained=True)]
+    "model",
+    [
+        CNNBackbone("resnet18"),
+        TimmBackbone("efficientnet_b0", pretrained=True),
+        "resnet18",
+        "efficientnet_b0",
+    ],
 )
 def test_full_inference(
     remote_sample: Callable, track_tmp_path: Path, model: Callable
@@ -210,23 +214,3 @@ def test_multi_gpu_feature_extraction(
 # -------------------------------------------------------------------------------------
 # Command Line Interface
 # -------------------------------------------------------------------------------------
-
-
-def test_cli_model_single_file(sample_svs: Path, track_tmp_path: Path) -> None:
-    """Test for feature extractor CLI single file."""
-    runner = CliRunner()
-    models_wsi_result = runner.invoke(
-        cli.main,
-        [
-            "deep-feature-extractor",
-            "--img-input",
-            str(sample_svs),
-            "--patch-mode",
-            "False",
-            "--output-path",
-            str(track_tmp_path / "output"),
-        ],
-    )
-
-    assert models_wsi_result.exit_code == 0
-    assert (track_tmp_path / "output" / (sample_svs.stem + ".zarr")).exists()
diff --git a/tiatoolbox/cli/deep_feature_extractor.py b/tiatoolbox/cli/deep_feature_extractor.py
@@ -35,7 +35,7 @@
 @cli_file_type(
     default="*.png, *.jpg, *.jpeg, *.tif, *.tiff, *.svs, *.ndpi, *.jp2, *.mrxs",
 )
-@cli_model(default="fcn-tissue_mask")
+@cli_model(default="efficientnet_b0")
 @cli_weights()
 @cli_device(default="cpu")
 @cli_batch_size(default=1)
diff --git a/tiatoolbox/models/architecture/__init__.py b/tiatoolbox/models/architecture/__init__.py
@@ -6,12 +6,15 @@
 from pydoc import locate
 from typing import TYPE_CHECKING
 
+import timm
 from huggingface_hub import hf_hub_download
 
 from tiatoolbox import rcParam
 from tiatoolbox.models.dataset.classification import predefined_preproc_func
 from tiatoolbox.models.models_abc import load_torch_model
 
+from .vanilla import CNNBackbone, TimmBackbone, timm_arch_dict, torch_cnn_backbone_dict
+
 if TYPE_CHECKING:  # pragma: no cover
     import torch
 
@@ -69,7 +72,7 @@ def get_pretrained_model(
     pretrained_weights: str | Path | None = None,
     *,
     overwrite: bool = False,
-) -> tuple[torch.nn.Module, ModelIOConfigABC]:
+) -> tuple[torch.nn.Module, ModelIOConfigABC | None]:
     """Load a predefined PyTorch model with the appropriate pretrained weights.
 
     Args:
@@ -127,6 +130,12 @@ def get_pretrained_model(
         msg = "pretrained_model must be a string."
         raise TypeError(msg)
 
+    if pretrained_model in torch_cnn_backbone_dict:
+        return CNNBackbone(pretrained_model), None
+
+    if pretrained_model in timm.list_models():
+        return TimmBackbone(pretrained_model, pretrained=True), None
+
     if pretrained_model not in PRETRAINED_INFO:
         msg = f"Pretrained model `{pretrained_model}` does not exist."
         raise ValueError(msg)
diff --git a/tiatoolbox/models/architecture/vanilla.py b/tiatoolbox/models/architecture/vanilla.py
@@ -17,6 +17,96 @@
     import numpy as np
     from torchvision.models import WeightsEnum
 
+torch_cnn_backbone_dict = {
+    "alexnet": torch_models.alexnet,
+    "resnet18": torch_models.resnet18,
+    "resnet34": torch_models.resnet34,
+    "resnet50": torch_models.resnet50,
+    "resnet101": torch_models.resnet101,
+    "resnext50_32x4d": torch_models.resnext50_32x4d,
+    "resnext101_32x8d": torch_models.resnext101_32x8d,
+    "wide_resnet50_2": torch_models.wide_resnet50_2,
+    "wide_resnet101_2": torch_models.wide_resnet101_2,
+    "densenet121": torch_models.densenet121,
+    "densenet161": torch_models.densenet161,
+    "densenet169": torch_models.densenet169,
+    "densenet201": torch_models.densenet201,
+    "inception_v3": torch_models.inception_v3,
+    "googlenet": torch_models.googlenet,
+    "mobilenet_v2": torch_models.mobilenet_v2,
+    "mobilenet_v3_large": torch_models.mobilenet_v3_large,
+    "mobilenet_v3_small": torch_models.mobilenet_v3_small,
+}
+
+timm_arch_dict = {
+    # UNI tile encoder: https://huggingface.co/MahmoodLab/UNI
+    "UNI": {
+        "model": "hf-hub:MahmoodLab/UNI",
+        "init_values": 1e-5,
+        "dynamic_img_size": True,
+    },
+    # Prov-GigaPath tile encoder: https://huggingface.co/prov-gigapath/prov-gigapath
+    "prov-gigapath": {"model": "hf_hub:prov-gigapath/prov-gigapath"},
+    # H-Optimus-0 tile encoder: https://huggingface.co/bioptimus/H-optimus-0
+    "H-optimus-0": {
+        "model": "hf-hub:bioptimus/H-optimus-0",
+        "init_values": 1e-5,
+        "dynamic_img_size": False,
+    },
+    # H-Optimus-1 tile encoder: https://huggingface.co/bioptimus/H-optimus-1
+    "H-optimus-1": {
+        "model": "hf-hub:bioptimus/H-optimus-1",
+        "init_values": 1e-5,
+        "dynamic_img_size": False,
+    },
+    # HO-mini tile encoder: https://huggingface.co/bioptimus/H0-mini
+    "H0-mini": {
+        "model": "hf-hub:bioptimus/H0-mini",
+        "init_values": 1e-5,
+        "dynamic_img_size": False,
+        "mlp_layer": timm.layers.SwiGLUPacked,
+        "act_layer": torch.nn.SiLU,
+    },
+    # UNI2-h tile encoder: https://huggingface.co/MahmoodLab/UNI2-h
+    "UNI2": {
+        "model": "hf-hub:MahmoodLab/UNI2-h",
+        "img_size": 224,
+        "patch_size": 14,
+        "depth": 24,
+        "num_heads": 24,
+        "init_values": 1e-5,
+        "embed_dim": 1536,
+        "mlp_ratio": 2.66667 * 2,
+        "num_classes": 0,
+        "no_embed_class": True,
+        "mlp_layer": timm.layers.SwiGLUPacked,
+        "act_layer": torch.nn.SiLU,
+        "reg_tokens": 8,
+        "dynamic_img_size": True,
+    },
+    # Virchow tile encoder: https://huggingface.co/paige-ai/Virchow
+    "Virchow": {
+        "model": "hf_hub:paige-ai/Virchow",
+        "mlp_layer": SwiGLUPacked,
+        "act_layer": torch.nn.SiLU,
+    },
+    # Virchow2 tile encoder: https://huggingface.co/paige-ai/Virchow2
+    "Virchow2": {
+        "model": "hf_hub:paige-ai/Virchow2",
+        "mlp_layer": SwiGLUPacked,
+        "act_layer": torch.nn.SiLU,
+    },
+    # Kaiko tile encoder:
+    # https://huggingface.co/1aurent/vit_large_patch14_reg4_224.kaiko_ai_towards_large_pathology_fms
+    "kaiko": {
+        "model": (
+            "hf_hub:1aurent/"
+            "vit_large_patch14_reg4_224.kaiko_ai_towards_large_pathology_fms"
+        ),
+        "dynamic_img_size": True,
+    },
+}
+
 
 def _get_architecture(
     arch_name: str,
@@ -52,31 +142,11 @@ def _get_architecture(
         >>> print(model)
 
     """
-    backbone_dict = {
-        "alexnet": torch_models.alexnet,
-        "resnet18": torch_models.resnet18,
-        "resnet34": torch_models.resnet34,
-        "resnet50": torch_models.resnet50,
-        "resnet101": torch_models.resnet101,
-        "resnext50_32x4d": torch_models.resnext50_32x4d,
-        "resnext101_32x8d": torch_models.resnext101_32x8d,
-        "wide_resnet50_2": torch_models.wide_resnet50_2,
-        "wide_resnet101_2": torch_models.wide_resnet101_2,
-        "densenet121": torch_models.densenet121,
-        "densenet161": torch_models.densenet161,
-        "densenet169": torch_models.densenet169,
-        "densenet201": torch_models.densenet201,
-        "inception_v3": torch_models.inception_v3,
-        "googlenet": torch_models.googlenet,
-        "mobilenet_v2": torch_models.mobilenet_v2,
-        "mobilenet_v3_large": torch_models.mobilenet_v3_large,
-        "mobilenet_v3_small": torch_models.mobilenet_v3_small,
-    }
-    if arch_name not in backbone_dict:
+    if arch_name not in torch_cnn_backbone_dict:
         msg = f"Backbone `{arch_name}` is not supported."
         raise ValueError(msg)
 
-    creator = backbone_dict[arch_name]
+    creator = torch_cnn_backbone_dict[arch_name]
     if "inception_v3" in arch_name or "googlenet" in arch_name:
         model = creator(weights=weights, aux_logits=False, num_classes=1000)
         return nn.Sequential(*list(model.children())[:-3])
@@ -123,87 +193,18 @@ def _get_timm_architecture(
         >>> print(model)
 
     """
-    if arch_name in [f"efficientnet_b{i}" for i in range(8)]:
-        model = timm.create_model(arch_name, pretrained=pretrained)
-        return nn.Sequential(*list(model.children())[:-1])
-
-    arch_map = {
-        # UNI tile encoder: https://huggingface.co/MahmoodLab/UNI
-        "UNI": {
-            "model": "hf-hub:MahmoodLab/UNI",
-            "init_values": 1e-5,
-            "dynamic_img_size": True,
-        },
-        # Prov-GigaPath tile encoder: https://huggingface.co/prov-gigapath/prov-gigapath
-        "prov-gigapath": {"model": "hf_hub:prov-gigapath/prov-gigapath"},
-        # H-Optimus-0 tile encoder: https://huggingface.co/bioptimus/H-optimus-0
-        "H-optimus-0": {
-            "model": "hf-hub:bioptimus/H-optimus-0",
-            "init_values": 1e-5,
-            "dynamic_img_size": False,
-        },
-        # H-Optimus-1 tile encoder: https://huggingface.co/bioptimus/H-optimus-1
-        "H-optimus-1": {
-            "model": "hf-hub:bioptimus/H-optimus-1",
-            "init_values": 1e-5,
-            "dynamic_img_size": False,
-        },
-        # HO-mini tile encoder: https://huggingface.co/bioptimus/H0-mini
-        "H0-mini": {
-            "model": "hf-hub:bioptimus/H0-mini",
-            "init_values": 1e-5,
-            "dynamic_img_size": False,
-            "mlp_layer": timm.layers.SwiGLUPacked,
-            "act_layer": torch.nn.SiLU,
-        },
-        # UNI2-h tile encoder: https://huggingface.co/MahmoodLab/UNI2-h
-        "UNI2": {
-            "model": "hf-hub:MahmoodLab/UNI2-h",
-            "img_size": 224,
-            "patch_size": 14,
-            "depth": 24,
-            "num_heads": 24,
-            "init_values": 1e-5,
-            "embed_dim": 1536,
-            "mlp_ratio": 2.66667 * 2,
-            "num_classes": 0,
-            "no_embed_class": True,
-            "mlp_layer": timm.layers.SwiGLUPacked,
-            "act_layer": torch.nn.SiLU,
-            "reg_tokens": 8,
-            "dynamic_img_size": True,
-        },
-        # Virchow tile encoder: https://huggingface.co/paige-ai/Virchow
-        "Virchow": {
-            "model": "hf_hub:paige-ai/Virchow",
-            "mlp_layer": SwiGLUPacked,
-            "act_layer": torch.nn.SiLU,
-        },
-        # Virchow2 tile encoder: https://huggingface.co/paige-ai/Virchow2
-        "Virchow2": {
-            "model": "hf_hub:paige-ai/Virchow2",
-            "mlp_layer": SwiGLUPacked,
-            "act_layer": torch.nn.SiLU,
-        },
-        # Kaiko tile encoder:
-        # https://huggingface.co/1aurent/vit_large_patch14_reg4_224.kaiko_ai_towards_large_pathology_fms
-        "kaiko": {
-            "model": (
-                "hf_hub:1aurent/"
-                "vit_large_patch14_reg4_224.kaiko_ai_towards_large_pathology_fms"
-            ),
-            "dynamic_img_size": True,
-        },
-    }
-
-    if arch_name in arch_map:  # pragma: no cover
+    if arch_name in timm_arch_dict:  # pragma: no cover
         # Coverage skipped timm API is tested using efficient U-Net.
         return timm.create_model(
-            arch_map[arch_name].pop("model"),
+            timm_arch_dict[arch_name].pop("model"),
             pretrained=pretrained,
-            **arch_map[arch_name],
+            **timm_arch_dict[arch_name],
         )
 
+    if arch_name in timm.list_models():
+        model = timm.create_model(arch_name, pretrained=pretrained)
+        return nn.Sequential(*list(model.children())[:-1])
+
     msg = f"Backbone {arch_name} not supported. "
     raise ValueError(msg)
 
diff --git a/tiatoolbox/models/engine/deep_feature_extractor.py b/tiatoolbox/models/engine/deep_feature_extractor.py
@@ -29,11 +29,11 @@
 Example:
 --------
 >>> from tiatoolbox.models.engine.deep_feature_extractor import DeepFeatureExtractor
->>> extractor = DeepFeatureExtractor(model="resnet50-kather100k")
+>>> extractor = DeepFeatureExtractor(model="efficientnet_b0")
 >>> wsis = ["slide1.svs", "slide2.svs"]
 >>> output = extractor.run(wsis, patch_mode=False, output_type="zarr")
 >>> print(output)
-'/path/to/output.zarr'
+... '/path/to/output.zarr'
 
 """
 
@@ -731,6 +731,7 @@ def run(
         Raises:
             ValueError:
                 If `output_type` is not "zarr" or "dict".
+
         """
         # return_probabilities is always True for FeatureExtractor.
         kwargs["return_probabilities"] = True

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@`
`35`	`35`	`@cli_file_type(`
`36`	`36`	`default=".png, .jpg, .jpeg, .tif, .tiff, .svs, .ndpi, .jp2, *.mrxs",`
`37`	`37`	`)`
`38`		`-@cli_model(default="fcn-tissue_mask")`
	`38`	`+@cli_model(default="efficientnet_b0")`
`39`	`39`	`@cli_weights()`
`40`	`40`	`@cli_device(default="cpu")`
`41`	`41`	`@cli_batch_size(default=1)`