Skip to content

Commit 8393c50

Browse files
Fix #347: Preserve HTML entities like © during processing
Co-Authored-By: m@martinnormark.com <m@martinnormark.com>
1 parent a34e7a7 commit 8393c50

File tree

4 files changed

+184
-76
lines changed

4 files changed

+184
-76
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using AngleSharp.Dom;
2+
using AngleSharp.Html.Parser;
3+
using PreMailer.Net.Html;
4+
using System;
5+
using System.IO;
6+
using Xunit;
7+
8+
namespace PreMailer.Net.Tests.Html
9+
{
10+
public class PreserveEntitiesHtmlMarkupFormatterTests
11+
{
12+
[Fact]
13+
public void Text_GivenHtmlWithCopyEntity_PreservesEntity()
14+
{
15+
string html = "<html><body><p>&copy; 2025</p></body></html>";
16+
var document = new HtmlParser().ParseDocument(html);
17+
var formatter = PreserveEntitiesHtmlMarkupFormatter.Instance;
18+
19+
string result;
20+
using (var sw = new StringWriter())
21+
{
22+
document.ToHtml(sw, formatter);
23+
result = sw.ToString();
24+
}
25+
26+
Assert.Contains("&copy;", result);
27+
Assert.DoesNotContain("©", result);
28+
}
29+
}
30+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
using System;
2+
using Xunit;
3+
4+
namespace PreMailer.Net.Tests
5+
{
6+
public class PreserveEntitiesTests
7+
{
8+
[Fact]
9+
public void MoveCssInline_GivenCopyEntity_PreservesEntity()
10+
{
11+
string htmlEncoded = "&copy;";
12+
string input = $"<html><head></head><body><div>{htmlEncoded}</div></body></html>";
13+
14+
var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true);
15+
16+
Assert.Contains(htmlEncoded, premailedOutput.Html);
17+
Assert.DoesNotContain("©", premailedOutput.Html);
18+
}
19+
}
20+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
using AngleSharp.Html;
2+
using AngleSharp.Dom;
3+
using System;
4+
using System.Text.RegularExpressions;
5+
using System.Collections.Generic;
6+
7+
namespace PreMailer.Net.Html
8+
{
9+
public class PreserveEntitiesHtmlMarkupFormatter : HtmlMarkupFormatter
10+
{
11+
private static readonly Dictionary<string, string> EntityReplacements = new Dictionary<string, string>
12+
{
13+
{ "©", "&copy;" },
14+
{ "®", "&reg;" },
15+
{ "™", "&trade;" },
16+
{ "£", "&pound;" },
17+
{ "€", "&euro;" },
18+
{ "¥", "&yen;" },
19+
{ "§", "&sect;" },
20+
{ "±", "&plusmn;" },
21+
{ "¼", "&frac14;" },
22+
{ "½", "&frac12;" },
23+
{ "¾", "&frac34;" }
24+
};
25+
26+
public static new readonly PreserveEntitiesHtmlMarkupFormatter Instance = new PreserveEntitiesHtmlMarkupFormatter();
27+
28+
public override string Text(ICharacterData text)
29+
{
30+
var result = base.Text(text);
31+
32+
foreach (var entity in EntityReplacements)
33+
{
34+
result = result.Replace(entity.Key, entity.Value);
35+
}
36+
37+
return result;
38+
}
39+
}
40+
}

PreMailer.Net/PreMailer.Net/PreMailer.cs

Lines changed: 94 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using AngleSharp.Html.Parser;
77
using AngleSharp.Xhtml;
88
using PreMailer.Net.Extensions;
9+
using PreMailer.Net.Html;
910
using PreMailer.Net.Sources;
1011
using System;
1112
using System.Collections.Generic;
@@ -67,85 +68,95 @@ public PreMailer(Stream stream, Uri baseUri = null)
6768
_cssSelectorParser = new CssSelectorParser();
6869
}
6970

70-
/// <summary>
71-
/// In-lines the CSS within the HTML given.
72-
/// </summary>
73-
/// <param name="html">The HTML input.</param>
74-
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
75-
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
76-
/// <param name="css">A string containing a style-sheet for inlining.</param>
77-
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
78-
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
79-
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
80-
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
81-
public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
82-
{
83-
return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
84-
}
71+
/// <summary>
72+
/// In-lines the CSS within the HTML given.
73+
/// </summary>
74+
/// <param name="html">The HTML input.</param>
75+
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
76+
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
77+
/// <param name="css">A string containing a style-sheet for inlining.</param>
78+
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
79+
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
80+
/// <param name="customFormatter">Custom formatter to use</param>
81+
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
82+
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
83+
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
84+
public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
85+
{
86+
return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
87+
}
8588

86-
/// <summary>
87-
/// In-lines the CSS within the HTML given.
88-
/// </summary>
89-
/// <param name="stream">The Stream input.</param>
90-
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
91-
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
92-
/// <param name="css">A string containing a style-sheet for inlining.</param>
93-
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
94-
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
95-
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
96-
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
97-
public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
98-
{
99-
return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
100-
}
89+
/// <summary>
90+
/// In-lines the CSS within the HTML given.
91+
/// </summary>
92+
/// <param name="stream">The Stream input.</param>
93+
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
94+
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
95+
/// <param name="css">A string containing a style-sheet for inlining.</param>
96+
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
97+
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
98+
/// <param name="customFormatter">Custom formatter to use</param>
99+
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
100+
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
101+
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
102+
public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
103+
{
104+
return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
105+
}
101106

102-
/// <summary>
103-
/// In-lines the CSS within the HTML given.
104-
/// </summary>
105-
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
106-
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
107-
/// <param name="html">The HTML input.</param>
108-
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
109-
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
110-
/// <param name="css">A string containing a style-sheet for inlining.</param>
111-
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
112-
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
113-
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
114-
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
115-
public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
116-
{
117-
return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
118-
}
107+
/// <summary>
108+
/// In-lines the CSS within the HTML given.
109+
/// </summary>
110+
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
111+
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
112+
/// <param name="html">The HTML input.</param>
113+
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
114+
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
115+
/// <param name="css">A string containing a style-sheet for inlining.</param>
116+
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
117+
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
118+
/// <param name="customFormatter">Custom formatter to use</param>
119+
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
120+
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
121+
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
122+
public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
123+
{
124+
return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
125+
}
119126

120-
/// <summary>
121-
/// In-lines the CSS within the HTML given.
122-
/// </summary>
123-
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
124-
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
125-
/// <param name="stream">The HTML input.</param>
126-
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
127-
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
128-
/// <param name="css">A string containing a style-sheet for inlining.</param>
129-
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
130-
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
131-
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
132-
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
133-
public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
134-
{
135-
return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
136-
}
127+
/// <summary>
128+
/// In-lines the CSS within the HTML given.
129+
/// </summary>
130+
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
131+
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
132+
/// <param name="stream">The HTML input.</param>
133+
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
134+
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
135+
/// <param name="css">A string containing a style-sheet for inlining.</param>
136+
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
137+
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
138+
/// <param name="customFormatter">Custom formatter to use</param>
139+
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
140+
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
141+
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
142+
public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
143+
{
144+
return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
145+
}
137146

138-
/// <summary>
139-
/// In-lines the CSS for the current HTML
140-
/// </summary>
141-
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
142-
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
143-
/// <param name="css">A string containing a style-sheet for inlining.</param>
144-
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
145-
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
146-
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
147-
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
148-
public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
147+
/// <summary>
148+
/// In-lines the CSS for the current HTML
149+
/// </summary>
150+
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
151+
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
152+
/// <param name="css">A string containing a style-sheet for inlining.</param>
153+
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
154+
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
155+
/// <param name="customFormatter">Custom formatter to use</param>
156+
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
157+
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
158+
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
159+
public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
149160
{
150161
// Store the variables used for inlining the CSS
151162
_removeStyleElements = removeStyleElements;
@@ -184,7 +195,14 @@ public InlineResult MoveCssInline(bool removeStyleElements = false, string ignor
184195
RemoveCssComments(cssSourceNodes);
185196
}
186197

187-
IMarkupFormatter markupFormatter = customFormatter ?? GetMarkupFormatterForDocType();
198+
IMarkupFormatter markupFormatter = customFormatter;
199+
200+
if (markupFormatter == null)
201+
{
202+
markupFormatter = preserveEntities ?
203+
PreserveEntitiesHtmlMarkupFormatter.Instance :
204+
GetMarkupFormatterForDocType();
205+
}
188206

189207
using (var sw = new StringWriter())
190208
{

0 commit comments

Comments
 (0)