-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNewsletterFormatter.cs
More file actions
126 lines (118 loc) · 4.4 KB
/
NewsletterFormatter.cs
File metadata and controls
126 lines (118 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
using AngleSharp.Dom;
using AngleSharp;
using System.Text;
namespace NewsletterBuilder;
public static class NewsletterFormatter
{
public static async Task<NewsletterFormats> FormatAsync(string originalHtml, string twitterHandle)
{
var htmlTransform = PreMailer.Net.PreMailer.MoveCssInline(originalHtml, removeStyleElements: true, ignoreElements: "#media,#webstyles");
var htmlBody = htmlTransform.Html;
var browser = BrowsingContext.New(Configuration.Default);
var document = await browser.OpenAsync(req => req.Content(htmlBody));
var title = document.GetElementsByTagName("h1")[0].TextContent.Trim();
var intro = document.GetElementById("intro").TextContent.Trim();
var preheader = document.GetElementById("preheader");
preheader.TextContent = intro;
var webDocument = await browser.OpenAsync(req => req.Content(document.ToHtml()));
webDocument.GetElementById("footer").Remove();
webDocument.GetElementsByTagName("hr")[^1].Remove();
webDocument.Head.InnerHtml +=
$"\n <title>{title}</title>\n" +
$" <meta name=\"description\" content=\"{intro.Replace("\"", """, StringComparison.OrdinalIgnoreCase)}\" />\n" +
$" <meta name=\"twitter:card\" content=\"summary_large_image\" />\n" +
$" <meta name=\"twitter:site\" content=\"{twitterHandle}\" />\n" +
$" <meta property=\"og:title\" content=\"{title.Replace("\"", """, StringComparison.OrdinalIgnoreCase)}\" />\n" +
$" <meta property=\"og:type\" content=\"website\" />\n";
foreach (var comment in webDocument.Descendants<IComment>())
{
comment.Remove();
}
var webHtml = string.Join('\n', webDocument.ToHtml().Split('\n').Where(line => !string.IsNullOrWhiteSpace(line)));
document.GetElementById("webstyles").Remove();
document.GetElementById("webheader").Remove();
document.GetElementById("webscript").Remove();
document.QuerySelector("meta[property=\"og:image\"]").Remove();
htmlBody = string.Join('\n', document.ToHtml().Split('\n').Where(line => !string.IsNullOrWhiteSpace(line)));
var sb = new StringBuilder();
foreach (var element in document.QuerySelectorAll("h1,h2,p,li"))
{
var type = element.TagName.ToLowerInvariant();
switch (type)
{
case "h1":
case "h2":
{
var headingText = element.TextContent.Trim();
var charLine = new string(type == "h1" ? '*' : '-', Math.Min(headingText.Length, 80));
sb.AppendLine();
sb.AppendLine(charLine);
sb.AppendLine(headingText);
sb.AppendLine(charLine);
sb.AppendLine();
break;
}
case "p":
sb.AppendLine();
sb.AppendLine(FormatToPlainText(element));
sb.AppendLine();
break;
case "li":
sb.AppendLine("* " + FormatToPlainText(element));
break;
default:
break;
}
}
sb.Replace(Environment.NewLine + Environment.NewLine + Environment.NewLine, Environment.NewLine + Environment.NewLine);
var textBody = sb.ToString().Trim();
return new()
{
WebHtml = webHtml,
EmailHtml = htmlBody,
EmailPlainText = textBody
};
}
private static string FormatToPlainText(IElement element)
{
if (element.ChildNodes.Length == 0) return element.TextContent.Trim();
var result = string.Empty;
foreach (var child in element.ChildNodes)
{
if (child is IElement childElement)
{
var type = childElement.TagName.ToLowerInvariant();
if (type == "a")
{
var href = childElement.Attributes["href"].Value.Replace("mailto:", string.Empty, StringComparison.OrdinalIgnoreCase);
var text = childElement.TextContent.Trim();
if (href == text) result += text;
else result += $"{text} ({href})";
}
else if (type is "b" or "strong")
{
result += $"*{FormatToPlainText(childElement)}*";
}
else if (type == "br")
{
result += Environment.NewLine;
}
else
{
result += FormatToPlainText(childElement);
}
}
else if (child is IText text)
{
result += text.TextContent;
}
}
return result.Trim();
}
}
public class NewsletterFormats
{
public string WebHtml { get; init; }
public string EmailHtml { get; init; }
public string EmailPlainText { get; init; }
}