Skip to content

Commit 35438be

Browse files
committed
wip
1 parent 8b9bc12 commit 35438be

File tree

3 files changed

+239
-7
lines changed

3 files changed

+239
-7
lines changed

src/release_tool/media_utils.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,57 @@ def process_description(
3535
self,
3636
description: str,
3737
version: str,
38-
output_path: str
38+
output_path: str,
39+
convert_html_to_markdown: bool = False
3940
) -> str:
4041
"""
4142
Process description text to download media and update references.
4243

4344
Args:
44-
description: Markdown text with potential media URLs
45+
description: Markdown text with potential media URLs (may contain HTML img tags)
4546
version: Version string for path substitution
4647
output_path: Path to the output release notes file
48+
convert_html_to_markdown: If True, convert HTML img tags to Markdown format
4749

4850
Returns:
4951
Updated description with local media references
5052
"""
5153
if not self.download_enabled or not description:
5254
return description
5355

56+
# Process HTML img tags if requested (for doc output)
57+
if convert_html_to_markdown:
58+
# Pattern for HTML img tags: <img ... />
59+
# We need to extract src and alt in any order
60+
html_img_pattern = r'<img\s+([^>]*?)\s*/>'
61+
62+
def replace_html_img(match):
63+
img_attrs = match.group(1)
64+
65+
# Extract src attribute
66+
src_match = re.search(r'src="([^"]+)"', img_attrs)
67+
if not src_match:
68+
return match.group(0) # No src, keep original
69+
url = src_match.group(1)
70+
71+
# Extract alt attribute (optional)
72+
alt_match = re.search(r'alt="([^"]*)"', img_attrs)
73+
alt_text = alt_match.group(1) if alt_match else "Image"
74+
75+
# Skip if already a local path
76+
if not url.startswith(('http://', 'https://')):
77+
return match.group(0)
78+
79+
# Download media and get local path
80+
local_path = self._download_media(url, version, output_path)
81+
if local_path:
82+
return f'![{alt_text}]({local_path})'
83+
84+
# If download fails, keep original
85+
return match.group(0)
86+
87+
description = re.sub(html_img_pattern, replace_html_img, description)
88+
5489
# Find all image and video references in markdown
5590
# Matches: ![alt](url) and videos with .mp4, .webm, etc.
5691
media_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'

src/release_tool/policies.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,8 @@ def _prepare_note_for_template(
729729
note: ReleaseNote,
730730
version: str,
731731
output_path: Optional[str],
732-
media_downloader
732+
media_downloader,
733+
convert_html_to_markdown: bool = False
733734
) -> Dict[str, Any]:
734735
"""
735736
Prepare a release note for template rendering.
@@ -743,11 +744,11 @@ def _prepare_note_for_template(
743744
if media_downloader and output_path:
744745
if note.description:
745746
processed_description = media_downloader.process_description(
746-
note.description, version, output_path
747+
note.description, version, output_path, convert_html_to_markdown
747748
)
748749
if note.migration_notes:
749750
processed_migration = media_downloader.process_description(
750-
note.migration_notes, version, output_path
751+
note.migration_notes, version, output_path, convert_html_to_markdown
751752
)
752753

753754
# Convert Author objects to dicts for template access
@@ -814,8 +815,16 @@ def format_markdown(
814815

815816
# If doc_output_template is configured, generate Docusaurus version as well
816817
if self.config.release_notes.doc_output_template:
818+
# Create separate media downloader for doc output with correct paths
819+
doc_media_downloader = None
820+
if self.config.output.download_media and doc_output_path:
821+
doc_media_downloader = MediaDownloader(
822+
self.config.output.assets_path,
823+
download_enabled=True
824+
)
825+
817826
doc_notes = self._format_with_doc_template(
818-
grouped_notes, version, doc_output_path, media_downloader, release_notes
827+
grouped_notes, version, doc_output_path, doc_media_downloader, release_notes
819828
)
820829
return (release_notes, doc_notes)
821830

@@ -942,7 +951,8 @@ def render_release_notes(preserve_br: bool = True) -> str:
942951
notes_data = []
943952
for note in notes:
944953
note_dict = self._prepare_note_for_template(
945-
note, version, output_path, media_downloader
954+
note, version, output_path, media_downloader,
955+
convert_html_to_markdown=True # Convert HTML img tags to Markdown for docs
946956
)
947957
notes_data.append(note_dict)
948958
all_notes_data.append(note_dict)

tests/test_media_utils.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# SPDX-FileCopyrightText: 2025 Sequent Tech Inc <legal@sequentech.io>
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
"""Tests for media download and processing utilities."""
6+
7+
import pytest
8+
from pathlib import Path
9+
from unittest.mock import Mock, patch, MagicMock
10+
from release_tool.media_utils import MediaDownloader
11+
12+
13+
@pytest.fixture
14+
def media_downloader(tmp_path):
15+
"""Create a MediaDownloader instance for testing."""
16+
assets_path = str(tmp_path / "assets" / "v{{ major }}.{{ minor }}")
17+
return MediaDownloader(assets_path, download_enabled=True)
18+
19+
20+
def test_process_markdown_images(media_downloader, tmp_path):
21+
"""Test processing of Markdown image syntax."""
22+
output_path = tmp_path / "release-notes.md"
23+
24+
description = """
25+
Some text here.
26+
![Example Image](https://github.com/user-attachments/assets/test-image.png)
27+
More text.
28+
"""
29+
30+
with patch.object(media_downloader, '_download_media') as mock_download:
31+
mock_download.return_value = "assets/v1.0/abc123_test-image.png"
32+
33+
result = media_downloader.process_description(
34+
description, "1.0.0", str(output_path)
35+
)
36+
37+
# Check that download was called with correct URL
38+
mock_download.assert_called_once()
39+
assert "https://github.com/user-attachments/assets/test-image.png" in mock_download.call_args[0]
40+
41+
# Check that Markdown image was replaced with local path
42+
assert "![Example Image](assets/v1.0/abc123_test-image.png)" in result
43+
44+
45+
def test_process_html_images_conversion(media_downloader, tmp_path):
46+
"""Test processing of HTML img tags with conversion to Markdown."""
47+
output_path = tmp_path / "release-notes.md"
48+
49+
description = """
50+
Some text here.
51+
<img width="1014" height="835" alt="Screenshot" src="https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" />
52+
More text.
53+
"""
54+
55+
with patch.object(media_downloader, '_download_media') as mock_download:
56+
mock_download.return_value = "assets/v1.0/abc123_screenshot.png"
57+
58+
result = media_downloader.process_description(
59+
description, "1.0.0", str(output_path), convert_html_to_markdown=True
60+
)
61+
62+
# Check that download was called
63+
mock_download.assert_called_once()
64+
assert "https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" in mock_download.call_args[0]
65+
66+
# Check that HTML img was converted to Markdown with local path
67+
assert "![Screenshot](assets/v1.0/abc123_screenshot.png)" in result
68+
# Original HTML should be gone
69+
assert "<img" not in result
70+
71+
72+
def test_process_html_images_without_conversion(media_downloader, tmp_path):
73+
"""Test that HTML img tags are NOT converted when convert_html_to_markdown=False."""
74+
output_path = tmp_path / "release-notes.md"
75+
76+
description = """
77+
Some text here.
78+
<img width="1014" height="835" alt="Screenshot" src="https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" />
79+
More text.
80+
"""
81+
82+
with patch.object(media_downloader, '_download_media') as mock_download:
83+
mock_download.return_value = "assets/v1.0/abc123_screenshot.png"
84+
85+
result = media_downloader.process_description(
86+
description, "1.0.0", str(output_path), convert_html_to_markdown=False
87+
)
88+
89+
# Check that download was NOT called (HTML imgs ignored without conversion)
90+
mock_download.assert_not_called()
91+
92+
# Original HTML should still be there
93+
assert "<img" in result
94+
assert "https://github.com/user-attachments/assets/8184a4b2-25f5-42d9-85c3-296e81ddd4d3" in result
95+
96+
97+
def test_process_html_images_with_default_alt(media_downloader, tmp_path):
98+
"""Test HTML img tag without alt attribute gets default alt text."""
99+
output_path = tmp_path / "release-notes.md"
100+
101+
description = '<img src="https://example.com/image.png" width="100" />'
102+
103+
with patch.object(media_downloader, '_download_media') as mock_download:
104+
mock_download.return_value = "assets/v1.0/abc123_image.png"
105+
106+
result = media_downloader.process_description(
107+
description, "1.0.0", str(output_path), convert_html_to_markdown=True
108+
)
109+
110+
# Check that default alt text "Image" was used
111+
assert "![Image](assets/v1.0/abc123_image.png)" in result
112+
113+
114+
def test_process_mixed_markdown_and_html_images(media_downloader, tmp_path):
115+
"""Test processing document with both Markdown and HTML images."""
116+
output_path = tmp_path / "release-notes.md"
117+
118+
description = """
119+
# Title
120+
121+
Here's a Markdown image:
122+
![Markdown Image](https://example.com/markdown.png)
123+
124+
And an HTML image:
125+
<img alt="HTML Image" src="https://example.com/html.png" width="500" />
126+
127+
End of document.
128+
"""
129+
130+
with patch.object(media_downloader, '_download_media') as mock_download:
131+
mock_download.side_effect = [
132+
"assets/v1.0/md_markdown.png", # First call for HTML img
133+
"assets/v1.0/md2_markdown.png" # Second call for Markdown img
134+
]
135+
136+
result = media_downloader.process_description(
137+
description, "1.0.0", str(output_path), convert_html_to_markdown=True
138+
)
139+
140+
# Both images should be processed
141+
assert mock_download.call_count == 2
142+
143+
# Both should be in Markdown format with local paths
144+
assert "![Markdown Image](assets/v1.0/md2_markdown.png)" in result
145+
assert "![HTML Image](assets/v1.0/md_markdown.png)" in result
146+
assert "<img" not in result
147+
148+
149+
def test_skip_local_paths(media_downloader, tmp_path):
150+
"""Test that local paths are not downloaded."""
151+
output_path = tmp_path / "release-notes.md"
152+
153+
description = """
154+
![Local Image](./local/path/image.png)
155+
<img src="assets/another.png" alt="Another Local" />
156+
"""
157+
158+
with patch.object(media_downloader, '_download_media') as mock_download:
159+
result = media_downloader.process_description(
160+
description, "1.0.0", str(output_path), convert_html_to_markdown=True
161+
)
162+
163+
# No downloads should happen for local paths
164+
mock_download.assert_not_called()
165+
166+
# Local paths should remain unchanged
167+
assert "./local/path/image.png" in result
168+
assert "assets/another.png" in result
169+
170+
171+
def test_disabled_media_downloader(tmp_path):
172+
"""Test that disabled MediaDownloader doesn't process anything."""
173+
assets_path = str(tmp_path / "assets")
174+
downloader = MediaDownloader(assets_path, download_enabled=False)
175+
output_path = tmp_path / "release-notes.md"
176+
177+
description = """
178+
![Image](https://example.com/image.png)
179+
<img src="https://example.com/html.png" alt="HTML" />
180+
"""
181+
182+
result = downloader.process_description(
183+
description, "1.0.0", str(output_path), convert_html_to_markdown=True
184+
)
185+
186+
# Nothing should be changed
187+
assert result == description

0 commit comments

Comments
 (0)