Skip to content

Commit 02a7923

Browse files
committed
- Support new docs page structure with multiple content elements.
- Add support for Tab Groups to the HTML > Markdown conversion.
1 parent 78a0002 commit 02a7923

File tree

5 files changed

+67
-15
lines changed

5 files changed

+67
-15
lines changed

lib/DocsToMarkdown/Converters/Anchor.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System.Diagnostics;
2-
using System.Text;
32
using HtmlAgilityPack;
43

54
namespace Julmar.DocsToMarkdown.Converters;
@@ -9,12 +8,18 @@ internal class Anchor() : BaseConverter("a")
98
public override string Convert(HtmlConverter converter, HtmlNode htmlInput)
109
{
1110
Debug.Assert(CanConvert(htmlInput));
12-
// TODO -- look to data tags
13-
11+
1412
var url = htmlInput.GetAttributeValue("href", "");
1513
if (string.IsNullOrEmpty(url)) // emit the tag directly if no URL.
1614
return htmlInput.OuterHtml;
1715

16+
var role = htmlInput.GetAttributeValue("role", "");
17+
if (role == "tab")
18+
{
19+
Debug.Assert(false, "Tab role should be handled in TabConverter");
20+
return string.Empty;
21+
}
22+
1823
// Handle lightbox images.
1924
if (url.EndsWith("#lightbox", StringComparison.InvariantCultureIgnoreCase)
2025
&& htmlInput.ChildNodes.Any(cn => cn.Name.Equals("img", StringComparison.CurrentCultureIgnoreCase)))

lib/DocsToMarkdown/Converters/DivSpan.cs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,55 @@ internal class DivSpan() : BaseConverter("div", "span")
1818
{ "has-pivot", ZonePivotSection},
1919
{ "xp-tag-hexagon", IgnoreBlock },
2020
{ "embeddedvideo", EmbeddedVideo },
21+
{ "tabGroup", TabGroup }
2122
};
2223

24+
private static string TabGroup(string className, HtmlConverter converter, HtmlNode htmlNode)
25+
{
26+
var list = htmlNode.ChildNodes.FirstOrDefault(n => n.NodeType == HtmlNodeType.Element && n.Name == "ul");
27+
if (list == null)
28+
return IgnoreBlock(className, converter, htmlNode);
29+
30+
var sb = new StringBuilder();
31+
foreach (var li in list.ChildNodes.Where(n => n.Name == "li"))
32+
{
33+
var anchor = li.ChildNodes.FirstOrDefault(n => n.NodeType == HtmlNodeType.Element && n.Name == "a");
34+
if (anchor?.Name != "a" || anchor.GetAttributeValue("role","") != "tab" )
35+
return IgnoreBlock(className, converter, htmlNode);
36+
37+
sb.Append(TabPanel(converter, htmlNode, anchor));
38+
}
39+
40+
return sb.AppendLine("---").ToString();
41+
}
42+
43+
private static string TabPanel(HtmlConverter converter, HtmlNode parentNode, HtmlNode anchor)
44+
{
45+
var data = anchor.GetAttributeValue("data-tab", "");
46+
if (string.IsNullOrEmpty(data))
47+
return string.Empty;
48+
49+
var section = parentNode.ChildNodes.FirstOrDefault(n =>
50+
n.Name == "section" &&
51+
n.GetAttributeValue("role", "").Equals("tabpanel", StringComparison.OrdinalIgnoreCase) &&
52+
n.GetAttributeValue("data-tab", "") == data);
53+
if (section == null)
54+
return anchor.OuterHtml;
55+
56+
var id = "#tab/" + data;
57+
58+
var sb = new StringBuilder();
59+
var anchorText = System.Net.WebUtility.HtmlDecode(anchor.InnerText);
60+
if (string.IsNullOrWhiteSpace(anchorText))
61+
anchorText = id;
62+
63+
sb.AppendLine($"# [{anchorText}]({id})")
64+
.AppendLine()
65+
.Append(converter.ConvertChildren(section));
66+
67+
return sb.ToString();
68+
}
69+
2370
private static string EmbeddedVideo(string className, HtmlConverter converter, HtmlNode htmlNode)
2471
{
2572
var frame = htmlNode.ChildNodes.FirstOrDefault(cn => cn.Name == "iframe");

lib/DocsToMarkdown/HtmlConverter.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ public IEnumerable<string> GetMarkdown()
5959
if (Metadata.PageKind == "module")
6060
yield break;
6161

62-
var root = htmlDocument.DocumentNode.SelectSingleNode("//div[@class='content']")
63-
?? htmlDocument.DocumentNode.SelectSingleNode("//div[@class='content ']");
64-
if (root == null)
62+
var roots = htmlDocument.DocumentNode.SelectNodes("//div[@class='content']")
63+
?? htmlDocument.DocumentNode.SelectNodes("//div[@class='content ']");
64+
if (roots == null)
6565
throw new ArgumentException("Cannot find content div to parse.");
6666

67-
foreach (var node in root.ChildNodes)
67+
foreach (var node in roots.SelectMany(n => n.ChildNodes))
6868
{
6969
var result = Convert(node);
7070
if (!string.IsNullOrWhiteSpace(result))

lib/Markdig.Renderer.Docx/DocxRenderer.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,13 @@ private string DetermineContentTypeFromUrl(string imageUrl)
272272
".bmp" => "image/bmp",
273273
".ico" => "image/x-icon",
274274
".cur" => "image/x-icon",
275-
".png" => "image/png",
276-
".jpg" => "image/jpeg",
277-
".jpeg" => "image/jpeg",
278-
".gif" => "image/gif",
279-
".svg" => "image/svg+xml",
280-
".tif" => "image/tiff",
281-
".tiff" => "image/tiff",
275+
".png" => ImageContentType.Png,
276+
".jpg" => ImageContentType.Jpg,
277+
".jpeg" => ImageContentType.Jpeg,
278+
".gif" => ImageContentType.Gif,
279+
".svg" => ImageContentType.Svg,
280+
".tif" => ImageContentType.Tiff,
281+
".tiff" => ImageContentType.Tiff,
282282
_ => null
283283
};
284284

src/ConvertDocx/Properties/launchSettings.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"profiles": {
33
"ConvertDocToDocx": {
44
"commandName": "Project",
5-
"commandLineArgs": "-d -f Docx \"https://learn.microsoft.com/en-us/training/modules/empower-educators-it-leaders-work-together-reference-groups/\" \"/users/mark/Desktop/test.docx\""
5+
"commandLineArgs": "-d -f Docx \"https://learn.microsoft.com/en-us/azure/dev-box/concept-what-are-dev-box-customizations?tabs=team-customizations/\" \"/users/mark/Desktop/test.docx\""
66
}
77
}
88
}

0 commit comments

Comments
 (0)