Skip to content

Commit e2a9890

Browse files
committed
Fixed issue with :::image::: handling.
Not properly closing the tag when a long caption was provided.
1 parent 8966ad8 commit e2a9890

File tree

8 files changed

+73
-12
lines changed

8 files changed

+73
-12
lines changed

.claude/settings.local.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"permissions": {
3+
"allow": [
4+
"Bash(git -C /Users/mark/git/ConvertLearnToDoc log --oneline -20)",
5+
"Bash(git -C /Users/mark/git/ConvertLearnToDoc remote -v)"
6+
]
7+
}
8+
}

lib/DocsToMarkdown/Converters/DivSpan.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ internal class DivSpan() : BaseConverter("div", "span")
1818
{ "has-pivot", ZonePivotSection},
1919
{ "xp-tag-hexagon", IgnoreBlock },
2020
{ "embeddedvideo", EmbeddedVideo },
21-
{ "tabGroup", TabGroup }
21+
{ "tabGroup", TabGroup },
22+
{ "visually-hidden", IgnoreBlock}
2223
};
2324

2425
private static string TabGroup(string className, HtmlConverter converter, HtmlNode htmlNode)

lib/DocsToMarkdown/Converters/Image.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,18 @@ public override string Convert(HtmlConverter converter, HtmlNode htmlInput)
4444
"type=\"content\"";
4545
var border = hasBorder ? "border=\"true\" " : "";
4646
var lightbox = lightBoxUrl != null ? $" lightbox=\"{lightBoxUrl}\"" : "";
47-
imageText = $":::image {type} {border}source=\"{source}\" alt-text=\"{altText}\"{lightbox}:::";
47+
48+
var captionText = LookAheadForImageCaptionText(htmlInput);
49+
50+
var sb = new StringBuilder(
51+
$":::image {type} {border}source=\"{source}\" alt-text=\"{altText}\"{lightbox}:::");
52+
if (captionText.Length > 0)
53+
{
54+
sb.AppendLine(captionText);
55+
sb.Append(":::image-end:::");
56+
}
57+
58+
imageText = sb.ToString();
4859
}
4960

5061
else if (lightBoxUrl != null)
@@ -64,4 +75,15 @@ public override string Convert(HtmlConverter converter, HtmlNode htmlInput)
6475

6576
return converter.ParentPrefix != "" ? Environment.NewLine + imageText : imageText;
6677
}
78+
79+
private static string LookAheadForImageCaptionText(HtmlNode htmlInput)
80+
{
81+
// <p><img src="test.png" alt="test"></p>
82+
// <div class="visually-hidden"><p>
83+
// Some Graphic.</p></div>
84+
var testNode = htmlInput.ParentNode.NextSibling;
85+
return testNode?.HasClass("visually-hidden") == true
86+
? testNode.InnerText.Replace("\n", Environment.NewLine)
87+
: string.Empty;
88+
}
6789
}

lib/DocsToMarkdown/Converters/Text.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ internal class Text() : BaseConverter("#text")
88
public override string Convert(HtmlConverter converter, HtmlNode htmlInput)
99
{
1010
Debug.Assert(CanConvert(htmlInput));
11-
return System.Net.WebUtility.HtmlDecode(htmlInput.InnerText);
11+
return System.Net.WebUtility.HtmlDecode(htmlInput.InnerText)
12+
.Replace(0xa0.ToString(), "&nbsp;");
1213
}
1314
}

lib/Markdig.Renderer.Docx/DocxRenderer.cs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System.Diagnostics;
22
using System.Drawing;
33
using System.Reflection;
4+
using System.Text;
45
using Markdig.Extensions.Yaml;
56
using Markdig.Renderer.Docx.Blocks;
67
using Markdig.Renderer.Docx.Inlines;
@@ -245,8 +246,9 @@ public Drawing InsertImage(Paragraph currentParagraph, MarkdownObject owner, str
245246
picture.Description = altText;
246247
currentParagraph.Add(picture);
247248

248-
if (!string.IsNullOrEmpty(title))
249-
picture.Drawing.AddCaption(": " + title);
249+
var captionText = SanitizeCaptionText(title);
250+
if (!string.IsNullOrEmpty(captionText))
251+
picture.Drawing.AddCaption(": " + captionText);
250252

251253
return picture.Drawing;
252254
}
@@ -292,6 +294,21 @@ private string DetermineContentTypeFromUrl(string imageUrl)
292294

293295
private static bool IsInternetUrl(string path) => path?.ToLower().StartsWith("http") == true;
294296

297+
private static string SanitizeCaptionText(string text)
298+
{
299+
if (string.IsNullOrWhiteSpace(text))
300+
return null;
301+
302+
var builder = new StringBuilder(text.Length);
303+
foreach (var ch in text)
304+
{
305+
builder.Append(char.IsControl(ch) ? ' ' : ch);
306+
}
307+
308+
var cleaned = builder.ToString();
309+
return string.Join(" ", cleaned.Split(' ', StringSplitOptions.RemoveEmptyEntries));
310+
}
311+
295312
/// <summary>
296313
/// Returns a specific embedded resource by name.
297314
/// </summary>

lib/Markdig.Renderer.Docx/TripleColonExtensions/TripleColonProcessor.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System.Drawing;
1+
using System.Drawing;
22
using IContainer = DXPlus.IContainer;
33

44
namespace Markdig.Renderer.Docx.TripleColonExtensions;
@@ -79,8 +79,10 @@ private static void HandleImage(IDocxRenderer owner, IContainer document, Paragr
7979
if (extension.Container?.Count > 0 && type == "complex")
8080
{
8181
// Should be strictly text as this is for screen readers.
82-
description = string.Join("\r\n", extension.Container.Select(b => (b as ParagraphBlock)?.Inline)
83-
.SelectMany(ic => ic.Select(il => il.ToString())));
82+
description = string.Join("\r\n", extension.Container
83+
.Select(b => (b as ParagraphBlock)?.Inline)
84+
.Where(inline => inline != null)
85+
.SelectMany(inline => inline.Select(il => il.ToString())));
8486
}
8587

8688
var drawing = owner.InsertImage(currentParagraph, extension.Container ?? (MarkdownObject)extension.Inlines, source, title, description, hasBorder?.ToLower()=="true");

src/ConvertDocx/Program.cs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using CommandLine;
1+
using CommandLine;
22
using Julmar.DocsToMarkdown;
33
using LearnDocUtils;
44
using MSLearnRepos;
@@ -146,7 +146,12 @@ private static async Task DownloadMarkdown(CommandLineOptions options)
146146
var downloader = new DocsConverter(tempFolder, new Uri(options.InputFile));
147147
var createdFiles = await downloader.ConvertAsync(!options.PreferPlainMarkdown,
148148
#if DEBUG
149-
tag => Console.Error.WriteLine($"Skipped: {tag.TrimStart().Substring(0, 20)}"));
149+
tag =>
150+
{
151+
var trimmed = tag.TrimStart();
152+
var prefix = trimmed.Length > 20 ? trimmed[..20] : trimmed;
153+
Console.Error.WriteLine($"Skipped: {prefix}");
154+
});
150155
#else
151156
null);
152157
#endif
@@ -245,7 +250,12 @@ private static async Task<List<string>> DownloadAndConvertAsync(CommandLineOptio
245250
var downloader = new DocsConverter(tempFolder, new Uri(options.InputFile));
246251
var createdFiles = await downloader.ConvertAsync(!options.PreferPlainMarkdown,
247252
#if DEBUG
248-
tag => Console.Error.WriteLine($"Skipped: {tag.TrimStart().Substring(0, 20)}"));
253+
tag =>
254+
{
255+
var trimmed = tag.TrimStart();
256+
var prefix = trimmed.Length > 20 ? trimmed[..20] : trimmed;
257+
Console.Error.WriteLine($"Skipped: {prefix}");
258+
});
249259
#else
250260
null);
251261
#endif

src/ConvertDocx/Properties/launchSettings.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"profiles": {
33
"ConvertDocToDocx": {
44
"commandName": "Project",
5-
"commandLineArgs": "-d -f Docx \"https://learn.microsoft.com/en-us/azure/dev-box/concept-what-are-dev-box-customizations?tabs=team-customizations/\" \"/users/mark/Desktop/test.docx\""
5+
"commandLineArgs": "-f Docx \"https://learn.microsoft.com/training/modules/accessibility-build-foundation-inclusive-learning/\" \"/users/mark/Desktop/test.docx\""
66
}
77
}
88
}

0 commit comments

Comments
 (0)