wip

edulix · edulix · commit 35438bec426a · 2025-12-06T15:13:20.000+01:00
diff --git a/src/release_tool/media_utils.py b/src/release_tool/media_utils.py
@@ -35,22 +35,57 @@ def process_description(
         self,
         description: str,
         version: str,
-        output_path: str
+        output_path: str,
+        convert_html_to_markdown: bool = False
     ) -> str:
         """
         Process description text to download media and update references.
 
         Args:
-            description: Markdown text with potential media URLs
+            description: Markdown text with potential media URLs (may contain HTML img tags)
             version: Version string for path substitution
             output_path: Path to the output release notes file
+            convert_html_to_markdown: If True, convert HTML img tags to Markdown format
 
         Returns:
             Updated description with local media references
         """
         if not self.download_enabled or not description:
             return description
 
+        # Process HTML img tags if requested (for doc output)
+        if convert_html_to_markdown:
+            # Pattern for HTML img tags: <img ... />
+            # We need to extract src and alt in any order
+            html_img_pattern = r'<img\s+([^>]*?)\s*/>'
+            
+            def replace_html_img(match):
+                img_attrs = match.group(1)
+                
+                # Extract src attribute
+                src_match = re.search(r'src="([^"]+)"', img_attrs)
+                if not src_match:
+                    return match.group(0)  # No src, keep original
+                url = src_match.group(1)
+                
+                # Extract alt attribute (optional)
+                alt_match = re.search(r'alt="([^"]*)"', img_attrs)
+                alt_text = alt_match.group(1) if alt_match else "Image"
+                
+                # Skip if already a local path
+                if not url.startswith(('http://', 'https://')):
+                    return match.group(0)
+                
+                # Download media and get local path
+                local_path = self._download_media(url, version, output_path)
+                if local_path:
+                    return f'![{alt_text}]({local_path})'
+                
+                # If download fails, keep original
+                return match.group(0)
+            
+            description = re.sub(html_img_pattern, replace_html_img, description)
+
         # Find all image and video references in markdown
         # Matches: ![alt](url) and videos with .mp4, .webm, etc.
         media_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
diff --git a/src/release_tool/policies.py b/src/release_tool/policies.py
@@ -729,7 +729,8 @@ def _prepare_note_for_template(
         note: ReleaseNote,
         version: str,
         output_path: Optional[str],
-        media_downloader
+        media_downloader,
+        convert_html_to_markdown: bool = False
     ) -> Dict[str, Any]:
         """
         Prepare a release note for template rendering.
@@ -743,11 +744,11 @@ def _prepare_note_for_template(
         if media_downloader and output_path:
             if note.description:
                 processed_description = media_downloader.process_description(
-                    note.description, version, output_path
+                    note.description, version, output_path, convert_html_to_markdown
                 )
             if note.migration_notes:
                 processed_migration = media_downloader.process_description(
-                    note.migration_notes, version, output_path
+                    note.migration_notes, version, output_path, convert_html_to_markdown
                 )
 
         # Convert Author objects to dicts for template access
@@ -814,8 +815,16 @@ def format_markdown(
 
         # If doc_output_template is configured, generate Docusaurus version as well
         if self.config.release_notes.doc_output_template:
+            # Create separate media downloader for doc output with correct paths
+            doc_media_downloader = None
+            if self.config.output.download_media and doc_output_path:
+                doc_media_downloader = MediaDownloader(
+                    self.config.output.assets_path,
+                    download_enabled=True
+                )
+            
             doc_notes = self._format_with_doc_template(
-                grouped_notes, version, doc_output_path, media_downloader, release_notes
+                grouped_notes, version, doc_output_path, doc_media_downloader, release_notes
             )
             return (release_notes, doc_notes)
 
@@ -942,7 +951,8 @@ def render_release_notes(preserve_br: bool = True) -> str:
             notes_data = []
             for note in notes:
                 note_dict = self._prepare_note_for_template(
-                    note, version, output_path, media_downloader
+                    note, version, output_path, media_downloader,
+                    convert_html_to_markdown=True  # Convert HTML img tags to Markdown for docs
                 )
                 notes_data.append(note_dict)
                 all_notes_data.append(note_dict)
diff --git a/tests/test_media_utils.py b/tests/test_media_utils.py
@@ -0,0 +1,187 @@
+# SPDX-FileCopyrightText: 2025 Sequent Tech Inc <legal@sequentech.io>
+#
+# SPDX-License-Identifier: MIT
+
+"""Tests for media download and processing utilities."""
+
+import pytest
+from pathlib import Path
+from unittest.mock import Mock, patch, MagicMock
+from release_tool.media_utils import MediaDownloader
+
+
+@pytest.fixture
+def media_downloader(tmp_path):
+    """Create a MediaDownloader instance for testing."""
+    assets_path = str(tmp_path / "assets" / "v{{ major }}.{{ minor }}")
+    return MediaDownloader(assets_path, download_enabled=True)
+
+
+def test_process_markdown_images(media_downloader, tmp_path):
+    """Test processing of Markdown image syntax."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+Some text here.
+![Example Image](https://github.com/user-attachments/assets/test-image.png)
+More text.
+"""
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        mock_download.return_value = "assets/v1.0/abc123_test-image.png"
+        
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path)
+        )
+        
+        # Check that download was called with correct URL
+        mock_download.assert_called_once()
+        assert "https://github.com/user-attachments/assets/test-image.png" in mock_download.call_args[0]
+        
+        # Check that Markdown image was replaced with local path
+        assert "![Example Image](assets/v1.0/abc123_test-image.png)" in result
+
+
+def test_process_html_images_conversion(media_downloader, tmp_path):
+    """Test processing of HTML img tags with conversion to Markdown."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+Some text here.
+<img width="1014" height="835" alt="Screenshot" src="https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" />
+More text.
+"""
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        mock_download.return_value = "assets/v1.0/abc123_screenshot.png"
+        
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path), convert_html_to_markdown=True
+        )
+        
+        # Check that download was called
+        mock_download.assert_called_once()
+        assert "https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" in mock_download.call_args[0]
+        
+        # Check that HTML img was converted to Markdown with local path
+        assert "![Screenshot](assets/v1.0/abc123_screenshot.png)" in result
+        # Original HTML should be gone
+        assert "<img" not in result
+
+
+def test_process_html_images_without_conversion(media_downloader, tmp_path):
+    """Test that HTML img tags are NOT converted when convert_html_to_markdown=False."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+Some text here.
+<img width="1014" height="835" alt="Screenshot" src="https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" />
+More text.
+"""
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        mock_download.return_value = "assets/v1.0/abc123_screenshot.png"
+        
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path), convert_html_to_markdown=False
+        )
+        
+        # Check that download was NOT called (HTML imgs ignored without conversion)
+        mock_download.assert_not_called()
+        
+        # Original HTML should still be there
+        assert "<img" in result
+        assert "https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" in result
+
+
+def test_process_html_images_with_default_alt(media_downloader, tmp_path):
+    """Test HTML img tag without alt attribute gets default alt text."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = '<img src="https://example.com/image.png" width="100" />'
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        mock_download.return_value = "assets/v1.0/abc123_image.png"
+        
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path), convert_html_to_markdown=True
+        )
+        
+        # Check that default alt text "Image" was used
+        assert "![Image](assets/v1.0/abc123_image.png)" in result
+
+
+def test_process_mixed_markdown_and_html_images(media_downloader, tmp_path):
+    """Test processing document with both Markdown and HTML images."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+# Title
+
+Here's a Markdown image:
+![Markdown Image](https://example.com/markdown.png)
+
+And an HTML image:
+<img alt="HTML Image" src="https://example.com/html.png" width="500" />
+
+End of document.
+"""
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        mock_download.side_effect = [
+            "assets/v1.0/md_markdown.png",  # First call for HTML img
+            "assets/v1.0/md2_markdown.png"  # Second call for Markdown img
+        ]
+        
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path), convert_html_to_markdown=True
+        )
+        
+        # Both images should be processed
+        assert mock_download.call_count == 2
+        
+        # Both should be in Markdown format with local paths
+        assert "![Markdown Image](assets/v1.0/md2_markdown.png)" in result
+        assert "![HTML Image](assets/v1.0/md_markdown.png)" in result
+        assert "<img" not in result
+
+
+def test_skip_local_paths(media_downloader, tmp_path):
+    """Test that local paths are not downloaded."""
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+![Local Image](./local/path/image.png)
+<img src="assets/another.png" alt="Another Local" />
+"""
+    
+    with patch.object(media_downloader, '_download_media') as mock_download:
+        result = media_downloader.process_description(
+            description, "1.0.0", str(output_path), convert_html_to_markdown=True
+        )
+        
+        # No downloads should happen for local paths
+        mock_download.assert_not_called()
+        
+        # Local paths should remain unchanged
+        assert "./local/path/image.png" in result
+        assert "assets/another.png" in result
+
+
+def test_disabled_media_downloader(tmp_path):
+    """Test that disabled MediaDownloader doesn't process anything."""
+    assets_path = str(tmp_path / "assets")
+    downloader = MediaDownloader(assets_path, download_enabled=False)
+    output_path = tmp_path / "release-notes.md"
+    
+    description = """
+![Image](https://example.com/image.png)
+<img src="https://example.com/html.png" alt="HTML" />
+"""
+    
+    result = downloader.process_description(
+        description, "1.0.0", str(output_path), convert_html_to_markdown=True
+    )
+    
+    # Nothing should be changed
+    assert result == description