Enhance error handling and add input validation in pipeline generator

mocsharp · mocsharp · commit 60ff4b5bacf1 · 2025-08-13T10:44:20.000-07:00
- Updated exception handling in NiftiWriter and VLMResultsWriter operators to catch specific exceptions instead of a general catch-all.
- Introduced a new method in AppGenerator to sanitize strings for valid Python identifiers, improving security against code injection.
- Added validation for model IDs to ensure only safe characters are accepted, preventing potential security vulnerabilities.
- Implemented unit tests to verify the correctness of the new sanitization and validation features.

Signed-off-by: [Your Name] &lt;your.email@example.com&gt;
Signed-off-by: Victor Chang &lt;vicchang@nvidia.com&gt;
diff --git a/monai/deploy/operators/nifti_writer_operator.py b/monai/deploy/operators/nifti_writer_operator.py
@@ -76,7 +76,7 @@ def compute(self, op_input, op_output, context):
         filename = None
         try:
             filename = op_input.receive(self.input_name_filename)
-        except:
+        except Exception:
             pass
 
         if image is None:
diff --git a/monai/deploy/operators/vlm_results_writer_operator.py b/monai/deploy/operators/vlm_results_writer_operator.py
@@ -163,5 +163,5 @@ def compute(self, op_input, op_output, context):
                         f,
                         indent=2,
                     )
-            except:
+            except Exception:
                 pass
diff --git a/tools/pipeline-generator/pipeline_generator/generator/app_generator.py b/tools/pipeline-generator/pipeline_generator/generator/app_generator.py
@@ -26,6 +26,32 @@
 class AppGenerator:
     """Generates MONAI Deploy applications from MONAI Bundles."""
 
+    @staticmethod
+    def _sanitize_for_python_identifier(name: str) -> str:
+        """Sanitize a string to be a valid Python identifier.
+
+        Args:
+            name: String to sanitize
+
+        Returns:
+            Valid Python identifier
+        """
+        # Replace invalid characters with underscores
+        sanitized = "".join(c if c.isalnum() or c == "_" else "_" for c in name)
+
+        # Remove leading/trailing underscores
+        sanitized = sanitized.strip("_")
+
+        # Ensure it doesn't start with a digit
+        if sanitized and sanitized[0].isdigit():
+            sanitized = f"_{sanitized}"
+
+        # Ensure it's not empty (all chars were invalid)
+        if not sanitized:
+            sanitized = "app"
+
+        return sanitized
+
     def __init__(self, settings: Optional[Settings] = None) -> None:
         """Initialize the generator.
 
@@ -41,7 +67,11 @@ def __init__(self, settings: Optional[Settings] = None) -> None:
             loader=FileSystemLoader(str(template_dir)),
             trim_blocks=True,
             lstrip_blocks=True,
-            autoescape=False,
+            # Autoescape is intentionally disabled because we're generating
+            # Python code, YAML, and other non-HTML files. HTML escaping would
+            # break the generated code. Security is handled via input validation
+            # in generate_app() method.
+            autoescape=False,  # nosec B701
         )
 
     def generate_app(
@@ -62,6 +92,10 @@ def generate_app(
         Returns:
             Path to the generated application directory
         """
+        # Validate model_id to prevent code injection
+        if not model_id or not all(c.isalnum() or c in "/-_" for c in model_id):
+            raise ValueError(f"Invalid model_id: {model_id}. Only alphanumeric characters, /, -, and _ are allowed.")
+
         # Create output directory
         output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -158,9 +192,14 @@ def _prepare_context(
 
         # Determine app name
         if not app_name:
-            # Sanitize name to ensure valid Python identifier
-            sanitized_name = "".join(c if c.isalnum() else "" for c in model_short_name.title())
-            app_name = f"{sanitized_name}App" if sanitized_name else "GeneratedApp"
+            # For auto-generated names, apply title case after replacing underscores
+            # This ensures "test_model" becomes "TestModel" not "Test_Model"
+            title_name = model_short_name.replace("_", " ").replace("-", " ").title().replace(" ", "")
+            sanitized_name = self._sanitize_for_python_identifier(title_name)
+            app_name = f"{sanitized_name}App"
+        else:
+            # Ensure user-provided app_name is also a valid Python identifier
+            app_name = self._sanitize_for_python_identifier(app_name)
 
         # Determine task type from metadata
         task = metadata.get("task", "segmentation").lower()
diff --git a/tools/pipeline-generator/pipeline_generator/generator/bundle_downloader.py b/tools/pipeline-generator/pipeline_generator/generator/bundle_downloader.py
@@ -51,7 +51,6 @@ def download_bundle(self, model_id: str, output_dir: Path, cache_dir: Optional[P
                 repo_id=model_id,
                 local_dir=bundle_dir,
                 cache_dir=cache_dir,
-                local_dir_use_symlinks=False,  # Copy files instead of symlinks
             )
 
             logger.info(f"Bundle downloaded to: {local_path}")
diff --git a/tools/pipeline-generator/tests/test_bundle_downloader.py b/tools/pipeline-generator/tests/test_bundle_downloader.py
@@ -41,7 +41,6 @@ def test_download_bundle_success(self, mock_snapshot_download, tmp_path):
             repo_id="MONAI/spleen_ct_segmentation",
             local_dir=output_dir / "model",
             cache_dir=cache_dir,
-            local_dir_use_symlinks=False,
         )
 
     @patch("pipeline_generator.generator.bundle_downloader.snapshot_download")
diff --git a/tools/pipeline-generator/tests/test_security.py b/tools/pipeline-generator/tests/test_security.py
@@ -0,0 +1,106 @@
+"""Test security features of the pipeline generator."""
+
+import pytest
+from pathlib import Path
+
+from pipeline_generator.generator.app_generator import AppGenerator
+
+
+class TestSecurity:
+    """Test security measures in the app generator."""
+
+    def test_model_id_validation(self):
+        """Test that invalid model IDs are rejected."""
+        generator = AppGenerator()
+        output_dir = Path("/tmp/test")
+
+        # Valid model IDs
+        valid_ids = [
+            "MONAI/spleen_ct_segmentation",
+            "test-org/model_name",
+            "user/model-with-dashes",
+            "org/model_with_underscores",
+        ]
+
+        # Invalid model IDs that could cause code injection
+        invalid_ids = [
+            "test; rm -rf /",  # Shell command injection
+            "test' OR '1'='1",  # SQL injection style
+            "test<script>alert('xss')</script>",  # HTML/JS injection
+            "test`echo hacked`",  # Command substitution
+            "test$(rm -rf /)",  # Command substitution
+            "test\" + __import__('os').system('ls') + \"",  # Python injection
+            "",  # Empty
+            None,  # None
+        ]
+
+        # Test valid IDs (should not raise)
+        for model_id in valid_ids:
+            # We're just testing validation, not full generation
+            try:
+                # This will fail at download stage, but validation should pass
+                generator.generate_app(model_id, output_dir)
+            except ValueError as e:
+                if "Invalid model_id" in str(e):
+                    pytest.fail(f"Valid model_id '{model_id}' was rejected: {e}")
+                # Other errors are fine (e.g., download failures)
+
+        # Test invalid IDs (should raise ValueError)
+        for model_id in invalid_ids:
+            if model_id is None:
+                continue  # Skip None test as it would fail at type checking
+            with pytest.raises(ValueError, match="Invalid model_id"):
+                generator.generate_app(model_id, output_dir)
+
+    def test_app_name_sanitization(self):
+        """Test that app names are properly sanitized for Python identifiers."""
+        # Test cases mapping input to expected sanitized output
+        test_cases = [
+            ("test; rm -rf /", "test__rm__rfApp"),  # Multiple special chars become underscores
+            ("test-with-dashes", "test_with_dashesApp"),
+            ("test.with.dots", "test_with_dotsApp"),
+            ("test space", "test_spaceApp"),
+            ("123test", "_123testApp"),  # Starting with digit
+            ("Test", "TestApp"),  # Normal case
+        ]
+
+        for input_name, expected_class_name in test_cases:
+            # The AppGenerator will sanitize the name internally
+            # We test the sanitization function directly
+            sanitized = AppGenerator._sanitize_for_python_identifier(input_name)
+            result_with_app = f"{sanitized}App"
+            assert (
+                result_with_app == expected_class_name
+            ), f"Failed for '{input_name}': got '{result_with_app}', expected '{expected_class_name}'"
+
+    def test_sanitize_for_python_identifier(self):
+        """Test the Python identifier sanitization method."""
+        test_cases = [
+            ("normal_name", "normal_name"),
+            ("name-with-dashes", "name_with_dashes"),
+            ("name.with.dots", "name_with_dots"),
+            ("name with spaces", "name_with_spaces"),
+            ("123name", "_123name"),  # Can't start with digit
+            ("", "app"),  # Empty string
+            ("!@#$%", "app"),  # All invalid chars
+            ("name!@#valid", "name___valid"),
+            ("CamelCase", "CamelCase"),  # Preserve case
+        ]
+
+        for input_str, expected in test_cases:
+            result = AppGenerator._sanitize_for_python_identifier(input_str)
+            assert result == expected, f"Failed for '{input_str}': got '{result}', expected '{expected}'"
+
+    def test_no_autoescape_with_comment(self):
+        """Test that autoescape is disabled with proper documentation."""
+        generator = AppGenerator()
+
+        # Verify autoescape is False
+        assert generator.env.autoescape is False
+
+        # The comment explaining why is in the source code
+        # This test just verifies the runtime behavior
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

Original file line number	Diff line number	Diff line change
`@@ -163,5 +163,5 @@ def compute(self, op_input, op_output, context):`
`163`	`163`	`f,`
`164`	`164`	`indent=2,`
`165`	`165`	`)`
`166`		`- except:`
	`166`	`+ except Exception:`
`167`	`167`	`pass`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,6 @@ def download_bundle(self, model_id: str, output_dir: Path, cache_dir: Optional[P`
`51`	`51`	`repo_id=model_id,`
`52`	`52`	`local_dir=bundle_dir,`
`53`	`53`	`cache_dir=cache_dir,`
`54`		`- local_dir_use_symlinks=False, # Copy files instead of symlinks`
`55`	`54`	`)`
`56`	`55`
`57`	`56`	`logger.info(f"Bundle downloaded to: {local_path}")`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,6 @@ def test_download_bundle_success(self, mock_snapshot_download, tmp_path):`
`41`	`41`	`repo_id="MONAI/spleen_ct_segmentation",`
`42`	`42`	`local_dir=output_dir / "model",`
`43`	`43`	`cache_dir=cache_dir,`
`44`		`- local_dir_use_symlinks=False,`
`45`	`44`	`)`
`46`	`45`
`47`	`46`	`@patch("pipeline_generator.generator.bundle_downloader.snapshot_download")`