Enhance bundle organization and improve model handling in pipeline generator

mocsharp · mocsharp · commit d0c1e332821e · 2025-09-09T19:25:59.000-07:00
- Added functionality to ensure the bundle root is included in sys.path for script imports.
- Introduced a new model configuration for pediatric abdominal CT segmentation in the config file.
- Improved the organization of model files by preferring PyTorch models over TensorRT models and handling subdirectory structures.
- Enhanced unit tests to verify the new model organization logic and ensure correct behavior under various scenarios.

Signed-off-by: Victor Chang &lt;vicchang@nvidia.com&gt;
diff --git a/monai/deploy/operators/monai_bundle_inference_operator.py b/monai/deploy/operators/monai_bundle_inference_operator.py
@@ -468,6 +468,11 @@ def _init_config(self, config_names):
             config_names ([str]): Names of the config (files) in the bundle
         """
 
+        # Ensure bundle root is on sys.path so 'scripts.*' can be imported
+        bundle_root = str(self._bundle_path)
+        if bundle_root not in sys.path:
+            sys.path.insert(0, bundle_root)
+
         parser = get_bundle_config(str(self._bundle_path), config_names)
         self._parser = parser
 
diff --git a/tools/pipeline-generator/pipeline_generator/config/config.yaml b/tools/pipeline-generator/pipeline_generator/config/config.yaml
@@ -43,6 +43,12 @@ endpoints:
       - model_id: "MONAI/pancreas_ct_dints_segmentation"
         input_type: "nifti"
         output_type: "nifti"
+      - model_id: "MONAI/pediatric_abdominal_ct_segmentation"
+        input_type: "nifti"
+        output_type: "nifti"
+        dependencies:
+        - nibabel>=3.2.0  # Required for NIfTI file I/O support
+        - itk>=5.3.0      # Required for ITK-based image readers/writers
       - model_id: "MONAI/Llama3-VILA-M3-3B"
         input_type: "custom"
         output_type: "custom"
diff --git a/tools/pipeline-generator/pipeline_generator/generator/app_generator.py b/tools/pipeline-generator/pipeline_generator/generator/app_generator.py
@@ -348,11 +348,21 @@ def _detect_data_format(self, inference_config: Dict[str, Any], modality: str) -
         # Check preprocessing transforms for hints
         if "preprocessing" in inference_config:
             transforms = inference_config["preprocessing"].get("transforms", [])
-            for transform in transforms:
-                target = transform.get("_target_", "")
-                if "LoadImaged" in target or "LoadImage" in target:
-                    # This suggests NIfTI format
+            # Handle case where transforms might be a string expression (e.g., "$@preprocessing_transforms + @deepedit_transforms")
+            if isinstance(transforms, str):
+                # If transforms is a string expression, we can't analyze it directly
+                # Look for LoadImaged in the inference config keys instead
+                config_str = str(inference_config)
+                if "LoadImaged" in config_str or "LoadImage" in config_str:
                     return False
+            elif isinstance(transforms, list):
+                for transform in transforms:
+                    # Ensure transform is a dictionary before calling .get()
+                    if isinstance(transform, dict):
+                        target = transform.get("_target_", "")
+                        if "LoadImaged" in target or "LoadImage" in target:
+                            # This suggests NIfTI format
+                            return False
 
         # Default based on modality
         return modality in ["CT", "MR", "MRI"]
diff --git a/tools/pipeline-generator/pipeline_generator/generator/bundle_downloader.py b/tools/pipeline-generator/pipeline_generator/generator/bundle_downloader.py
@@ -192,9 +192,84 @@ def organize_bundle_structure(self, bundle_path: Path) -> None:
                 logger.debug(f"Moved {config_file} to configs/")
         
         # Move model files to models/
-        model_extensions = [".pt", ".ts", ".onnx"]
+        # Prefer PyTorch (.pt) > ONNX (.onnx) > TorchScript (.ts) for better compatibility
+        model_extensions = [".pt", ".onnx", ".ts"]
+        
+        # First move model files from root directory
         for ext in model_extensions:
             for model_file in bundle_path.glob(f"*{ext}"):
                 if model_file.is_file() and not (models_dir / model_file.name).exists():
                     model_file.rename(models_dir / model_file.name)
                     logger.debug(f"Moved {model_file.name} to models/")
+        
+        # Check if we already have a suitable model in the main directory
+        # Prefer .pt files, then .onnx, then .ts
+        has_suitable_model = False
+        for ext in model_extensions:
+            if any(models_dir.glob(f"*{ext}")):
+                has_suitable_model = True
+                break
+        
+        # If no suitable model in main directory, move from subdirectories
+        if not has_suitable_model:
+            # Also move model files from subdirectories to the main models/ directory
+            # This handles cases where models are in subdirectories like models/A100/
+            # Prefer PyTorch models over TensorRT models for better compatibility
+            for ext in model_extensions:
+                model_files = list(models_dir.glob(f"**/*{ext}"))
+                if not model_files:
+                    continue
+                    
+                # Filter files that are not in the main models directory
+                subdirectory_files = [f for f in model_files if f.parent != models_dir]
+                if not subdirectory_files:
+                    continue
+                    
+                target_name = f"model{ext}"
+                target_path = models_dir / target_name
+                if target_path.exists():
+                    continue  # Target already exists
+                
+                # Prefer non-TensorRT models for better compatibility
+                # TensorRT models often have "_trt" in their name
+                preferred_file = None
+                for model_file in subdirectory_files:
+                    if "_trt" not in model_file.name.lower():
+                        preferred_file = model_file
+                        break
+                
+                # If no non-TensorRT model found, use the first available
+                if preferred_file is None:
+                    preferred_file = subdirectory_files[0]
+                
+                # Move the preferred model file
+                preferred_file.rename(target_path)
+                logger.debug(f"Moved {preferred_file.name} from {preferred_file.parent.name}/ to models/{target_name}")
+                
+                # Clean up empty subdirectory if it exists
+                try:
+                    if preferred_file.parent.exists() and not any(preferred_file.parent.iterdir()):
+                        preferred_file.parent.rmdir()
+                        logger.debug(f"Removed empty directory {preferred_file.parent}")
+                except OSError:
+                    pass  # Directory not empty or other issue
+                break  # Only move one model file total
+        
+        # Ensure we have model.pt or model.ts in the main directory for MONAI Deploy
+        # Create symlinks with standard names if needed
+        standard_model_path = models_dir / "model.pt"
+        if not standard_model_path.exists():
+            # Look for any .pt file to link to model.pt
+            pt_files = list(models_dir.glob("*.pt"))
+            if pt_files:
+                # Create a copy with the standard name
+                pt_files[0].rename(standard_model_path)
+                logger.debug(f"Renamed {pt_files[0].name} to model.pt")
+            else:
+                # No .pt file found, look for .ts file and create model.ts instead
+                standard_ts_path = models_dir / "model.ts"
+                if not standard_ts_path.exists():
+                    ts_files = list(models_dir.glob("*.ts"))
+                    if ts_files:
+                        ts_files[0].rename(standard_ts_path)
+                        logger.debug(f"Renamed {ts_files[0].name} to model.ts")
diff --git a/tools/pipeline-generator/tests/test_bundle_downloader.py b/tools/pipeline-generator/tests/test_bundle_downloader.py
@@ -339,3 +339,134 @@ def test_get_inference_config_logs_error(self, mock_logger, tmp_path):
 
         assert result is None
         mock_logger.error.assert_called()
+
+    def test_organize_bundle_structure_subdirectory_models(self, tmp_path):
+        """Test organizing models from subdirectories to main models/ directory."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        subdir = models_dir / "A100"
+        subdir.mkdir(parents=True)
+
+        # Create model file in subdirectory
+        subdir_model = subdir / "dynunet_FT_trt_16.ts"
+        subdir_model.write_text("tensorrt model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # Model should be moved to main models/ directory with standard name
+        assert (models_dir / "model.ts").exists()
+        assert not subdir_model.exists()
+        assert not subdir.exists()  # Empty subdirectory should be removed
+
+    def test_organize_bundle_structure_prefers_pytorch_over_tensorrt(self, tmp_path):
+        """Test that PyTorch models are preferred over TensorRT models."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        subdir = models_dir / "A100"
+        subdir.mkdir(parents=True)
+
+        # Create both PyTorch and TensorRT models in subdirectory
+        pytorch_model = subdir / "dynunet_FT.pt"
+        tensorrt_model = subdir / "dynunet_FT_trt_16.ts"
+        pytorch_model.write_bytes(b"pytorch model")
+        tensorrt_model.write_text("tensorrt model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # PyTorch model should be preferred and moved
+        assert (models_dir / "model.pt").exists()
+        assert not (models_dir / "model.ts").exists()
+        assert not pytorch_model.exists()
+        # TensorRT model should remain in subdirectory
+        assert tensorrt_model.exists()
+
+    def test_organize_bundle_structure_standard_naming_pytorch(self, tmp_path):
+        """Test renaming PyTorch models to standard names."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        models_dir.mkdir(parents=True)
+
+        # Create PyTorch model with custom name
+        custom_model = models_dir / "dynunet_FT.pt"
+        custom_model.write_bytes(b"pytorch model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # Model should be renamed to standard name
+        assert (models_dir / "model.pt").exists()
+        assert not custom_model.exists()
+
+    def test_organize_bundle_structure_standard_naming_torchscript(self, tmp_path):
+        """Test renaming TorchScript models to standard names when no PyTorch model exists."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        models_dir.mkdir(parents=True)
+
+        # Create only TorchScript model with custom name
+        custom_model = models_dir / "custom_model.ts"
+        custom_model.write_text("torchscript model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # Model should be renamed to standard name
+        assert (models_dir / "model.ts").exists()
+        assert not custom_model.exists()
+
+    def test_organize_bundle_structure_skips_when_suitable_model_exists(self, tmp_path):
+        """Test that subdirectory organization is skipped when suitable model already exists."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        subdir = models_dir / "A100"
+        subdir.mkdir(parents=True)
+
+        # Create model in main directory
+        main_model = models_dir / "existing_model.pt"
+        main_model.write_bytes(b"existing pytorch model")
+
+        # Create model in subdirectory
+        subdir_model = subdir / "dynunet_FT_trt_16.ts"
+        subdir_model.write_text("tensorrt model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # Main model should be renamed to standard name
+        assert (models_dir / "model.pt").exists()
+        assert not main_model.exists()
+        
+        # Subdirectory model should remain untouched
+        assert subdir_model.exists()
+        assert subdir.exists()
+
+    def test_organize_bundle_structure_multiple_extensions_preference(self, tmp_path):
+        """Test extension preference order: .pt > .onnx > .ts."""
+        bundle_path = tmp_path / "bundle"
+        models_dir = bundle_path / "models"
+        subdir = models_dir / "A100"
+        subdir.mkdir(parents=True)
+
+        # Create models with different extensions in subdirectory
+        pt_model = subdir / "model.pt"
+        onnx_model = subdir / "model.onnx"
+        ts_model = subdir / "model.ts"
+        
+        pt_model.write_bytes(b"pytorch model")
+        onnx_model.write_bytes(b"onnx model") 
+        ts_model.write_text("torchscript model")
+
+        # Organize structure
+        self.downloader.organize_bundle_structure(bundle_path)
+
+        # Should prefer .pt model
+        assert (models_dir / "model.pt").exists()
+        assert not (models_dir / "model.onnx").exists()
+        assert not (models_dir / "model.ts").exists()
+        assert not pt_model.exists()
+        
+        # Other models should remain in subdirectory
+        assert onnx_model.exists()
+        assert ts_model.exists()
diff --git a/tools/pipeline-generator/tests/test_generator.py b/tools/pipeline-generator/tests/test_generator.py
@@ -580,6 +580,46 @@ def test_inference_config_with_loadimage_transform(self):
         result = generator._detect_data_format(inference_config, "CT")
         assert result is False
 
+    def test_inference_config_with_string_transforms(self):
+        """Test _detect_data_format with string transforms expression."""
+        generator = AppGenerator()
+
+        # Create inference config with string transforms (like spleen_deepedit_annotation)
+        inference_config = {
+            "preprocessing": {
+                "_target_": "Compose",
+                "transforms": "$@preprocessing_transforms + @deepedit_transforms + @extra_transforms"
+            },
+            "preprocessing_transforms": [
+                {"_target_": "LoadImaged", "keys": "image"},
+                {"_target_": "EnsureChannelFirstd", "keys": "image"}
+            ]
+        }
+
+        # This should return False (NIfTI format) because LoadImaged is found in config string
+        result = generator._detect_data_format(inference_config, "CT")
+        assert result is False
+
+    def test_inference_config_with_string_transforms_no_loadimage(self):
+        """Test _detect_data_format with string transforms expression without LoadImaged."""
+        generator = AppGenerator()
+
+        # Create inference config with string transforms but no LoadImaged
+        inference_config = {
+            "preprocessing": {
+                "_target_": "Compose", 
+                "transforms": "$@preprocessing_transforms + @other_transforms"
+            },
+            "preprocessing_transforms": [
+                {"_target_": "SomeOtherTransform", "keys": "image"},
+                {"_target_": "EnsureChannelFirstd", "keys": "image"}
+            ]
+        }
+
+        # This should return True (DICOM format) for CT modality when no LoadImaged found
+        result = generator._detect_data_format(inference_config, "CT")
+        assert result is True
+
     def test_detect_model_type_pathology(self):
         """Test _detect_model_type for pathology models."""
         generator = AppGenerator()