Skip to content

Commit 35e8f2e

Browse files
authored
V15: Rich Text Editor links do not work with query strings and anchors (#17288)
* fix: anchors and query strings do not work Since the change from UDIs to localLinks in href, the pattern matched a little too much in the href section completely ignoring any "extras" such as querystrings and anchors after the locallink, which meant that the locallink did not get replaced at all if they were present. This is fixed by limiting the regexp a bit. * fix: legacy links do not follow the same regexp as new links Because we are no longer matching the whole `href` attribute but only some of its contents, we need to fix up the old pattern. It has been extended with matching groups that follow the same pattern as the new links. * feat: allow a-tags to be multiline example: ```html <a type="document" href="/{localLink:<GUID>}"> Test </a> ``` * fix: split regex into two parts: first a tokenizer for a-tags and then a type-finder * fix: ensure only "document" and "media" are matching to speed up the pattern * feat: allow a-tags to be multiline
1 parent 1945ac2 commit 35e8f2e

File tree

2 files changed

+69
-43
lines changed

2 files changed

+69
-43
lines changed

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@ public sealed class HtmlLocalLinkParser
1414
// <a type="media" href="/{localLink:7e21a725-b905-4c5f-86dc-8c41ec116e39}" title="media">media</a>
1515
// <a type="document" href="/{localLink:eed5fc6b-96fd-45a5-a0f1-b1adfb483c2f}" title="other page">other page</a>
1616
internal static readonly Regex LocalLinkTagPattern = new(
17-
@"<a\s+(?:(?:(?:type=['""](?<type>document|media)['""].*?(?<locallink>href=[""']/{localLink:(?<guid>[a-fA-F0-9-]+)})[""'])|((?<locallink>href=[""']/{localLink:(?<guid>[a-fA-F0-9-]+)})[""'].*?type=(['""])(?<type>document|media)(?:['""])))|(?:(?:type=['""](?<type>document|media)['""])|(?:(?<locallink>href=[""']/{localLink:[a-fA-F0-9-]+})[""'])))[^>]*>",
17+
@"<a.+?href=['""](?<locallink>\/?{localLink:(?<guid>[a-fA-F0-9-]+)})[^>]*?>",
18+
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
19+
20+
internal static readonly Regex TypePattern = new(
21+
"""type=['"](?<type>(?:media|document))['"]""",
1822
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
1923

2024
internal static readonly Regex LocalLinkPattern = new(
21-
@"href=""[/]?(?:\{|\%7B)localLink:([a-zA-Z0-9-://]+)(?:\}|\%7D)",
25+
@"href=['""](?<locallink>\/?(?:\{|\%7B)localLink:(?<guid>[a-zA-Z0-9-://]+)(?:\}|\%7D))",
2226
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
2327

2428
private readonly IPublishedUrlProvider _publishedUrlProvider;
@@ -58,32 +62,28 @@ public string EnsureInternalLinks(string text)
5862
{
5963
if (tagData.Udi is not null)
6064
{
61-
var newLink = "#";
62-
if (tagData.Udi?.EntityType == Constants.UdiEntityType.Document)
63-
{
64-
newLink = _publishedUrlProvider.GetUrl(tagData.Udi.Guid);
65-
}
66-
else if (tagData.Udi?.EntityType == Constants.UdiEntityType.Media)
65+
var newLink = tagData.Udi?.EntityType switch
6766
{
68-
newLink = _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid);
69-
}
70-
67+
Constants.UdiEntityType.Document => _publishedUrlProvider.GetUrl(tagData.Udi.Guid),
68+
Constants.UdiEntityType.Media => _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid),
69+
_ => ""
70+
};
7171

7272
text = StripTypeAttributeFromTag(text, tagData.Udi!.EntityType);
73-
text = text.Replace(tagData.TagHref, "href=\"" + newLink);
73+
text = text.Replace(tagData.TagHref, newLink);
7474
}
7575
else if (tagData.IntId.HasValue)
7676
{
7777
var newLink = _publishedUrlProvider.GetUrl(tagData.IntId.Value);
78-
text = text.Replace(tagData.TagHref, "href=\"" + newLink);
78+
text = text.Replace(tagData.TagHref, newLink);
7979
}
8080
}
8181

8282
return text;
8383
}
8484

8585
// under normal circumstances, the type attribute is preceded by a space
86-
// to cover the rare occasion where it isn't, we first replace with a a space and then without.
86+
// to cover the rare occasion where it isn't, we first replace with a space and then without.
8787
private string StripTypeAttributeFromTag(string tag, string type) =>
8888
tag.Replace($" type=\"{type}\"", string.Empty)
8989
.Replace($"type=\"{type}\"", string.Empty);
@@ -93,21 +93,22 @@ private IEnumerable<LocalLinkTag> FindLocalLinkIds(string text)
9393
MatchCollection localLinkTagMatches = LocalLinkTagPattern.Matches(text);
9494
foreach (Match linkTag in localLinkTagMatches)
9595
{
96-
if (linkTag.Groups.Count < 1)
96+
if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false)
9797
{
9898
continue;
9999
}
100100

101-
if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false)
101+
// Find the type attribute
102+
Match typeMatch = TypePattern.Match(linkTag.Value);
103+
if (typeMatch.Success is false)
102104
{
103105
continue;
104106
}
105107

106108
yield return new LocalLinkTag(
107109
null,
108-
new GuidUdi(linkTag.Groups["type"].Value, guid),
109-
linkTag.Groups["locallink"].Value,
110-
linkTag.Value);
110+
new GuidUdi(typeMatch.Groups["type"].Value, guid),
111+
linkTag.Groups["locallink"].Value);
111112
}
112113

113114
// also return legacy results for values that have not been migrated
@@ -124,25 +125,26 @@ private IEnumerable<LocalLinkTag> FindLegacyLocalLinkIds(string text)
124125
MatchCollection tags = LocalLinkPattern.Matches(text);
125126
foreach (Match tag in tags)
126127
{
127-
if (tag.Groups.Count > 0)
128+
if (tag.Groups.Count <= 0)
128129
{
129-
var id = tag.Groups[1].Value; // .Remove(tag.Groups[1].Value.Length - 1, 1);
130+
continue;
131+
}
130132

131-
// The id could be an int or a UDI
132-
if (UdiParser.TryParse(id, out Udi? udi))
133-
{
134-
var guidUdi = udi as GuidUdi;
135-
if (guidUdi is not null)
136-
{
137-
yield return new LocalLinkTag(null, guidUdi, tag.Value, null);
138-
}
139-
}
133+
var id = tag.Groups["guid"].Value;
140134

141-
if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId))
135+
// The id could be an int or a UDI
136+
if (UdiParser.TryParse(id, out Udi? udi))
137+
{
138+
if (udi is GuidUdi guidUdi)
142139
{
143-
yield return new LocalLinkTag (intId, null, tag.Value, null);
140+
yield return new LocalLinkTag(null, guidUdi, tag.Groups["locallink"].Value);
144141
}
145142
}
143+
144+
if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId))
145+
{
146+
yield return new LocalLinkTag (intId, null, tag.Groups["locallink"].Value);
147+
}
146148
}
147149
}
148150

@@ -155,20 +157,10 @@ public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref)
155157
TagHref = tagHref;
156158
}
157159

158-
public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref, string? fullTag)
159-
{
160-
IntId = intId;
161-
Udi = udi;
162-
TagHref = tagHref;
163-
FullTag = fullTag;
164-
}
165-
166160
public int? IntId { get; }
167161

168162
public GuidUdi? Udi { get; }
169163

170164
public string TagHref { get; }
171-
172-
public string? FullTag { get; }
173165
}
174166
}

tests/Umbraco.Tests.UnitTests/Umbraco.Core/Templates/HtmlLocalLinkParserTests.cs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,34 @@ public void Returns_Udis_From_Legacy_And_Current_LocalLinks()
117117
[TestCase(
118118
"<a href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\"type=\"media\">world</a>",
119119
"<a href=\"/media/1001/my-image.jpg\" title=\"world\">world</a>")]
120+
[TestCase(
121+
"<p><a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a></p><p><a href=\"/{localLink:7e21a725-b905-4c5f-86dc-8c41ec116e39}\" title=\"world\" type=\"media\">world</a></p>",
122+
"<p><a href=\"/my-test-url\" title=\"world\">world</a></p><p><a href=\"/media/1001/my-image.jpg\" title=\"world\">world</a></p>")]
123+
124+
// attributes order should not matter
125+
[TestCase(
126+
"<a rel=\"noopener\" title=\"world\" type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\">world</a>",
127+
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url\">world</a>")]
128+
[TestCase(
129+
"<a rel=\"noopener\" title=\"world\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" type=\"document\">world</a>",
130+
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url\">world</a>")]
131+
[TestCase(
132+
"<a rel=\"noopener\" title=\"world\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}#anchor\" type=\"document\">world</a>",
133+
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url#anchor\">world</a>")]
134+
135+
// anchors and query strings
136+
[TestCase(
137+
"<a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}#anchor\" title=\"world\">world</a>",
138+
"<a href=\"/my-test-url#anchor\" title=\"world\">world</a>")]
139+
[TestCase(
140+
"<a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}?v=1\" title=\"world\">world</a>",
141+
"<a href=\"/my-test-url?v=1\" title=\"world\">world</a>")]
142+
143+
// custom type ignored
144+
[TestCase(
145+
"<a type=\"custom\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a>",
146+
"<a type=\"custom\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a>")]
147+
120148
// legacy
121149
[TestCase(
122150
"hello href=\"{localLink:1234}\" world ",
@@ -127,17 +155,23 @@ public void Returns_Udis_From_Legacy_And_Current_LocalLinks()
127155
[TestCase(
128156
"hello href=\"{localLink:umb://document/9931BDE0AAC34BABB838909A7B47570E}\" world ",
129157
"hello href=\"/my-test-url\" world ")]
158+
[TestCase(
159+
"hello href=\"{localLink:umb://document/9931BDE0AAC34BABB838909A7B47570E}#anchor\" world ",
160+
"hello href=\"/my-test-url#anchor\" world ")]
130161
[TestCase(
131162
"hello href=\"{localLink:umb://media/9931BDE0AAC34BABB838909A7B47570E}\" world ",
132163
"hello href=\"/media/1001/my-image.jpg\" world ")]
164+
[TestCase(
165+
"hello href='{localLink:umb://media/9931BDE0AAC34BABB838909A7B47570E}' world ",
166+
"hello href='/media/1001/my-image.jpg' world ")]
133167

134168
// This one has an invalid char so won't match.
135169
[TestCase(
136170
"hello href=\"{localLink:umb^://document/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ",
137171
"hello href=\"{localLink:umb^://document/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ")]
138172
[TestCase(
139173
"hello href=\"{localLink:umb://document-type/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ",
140-
"hello href=\"#\" world ")]
174+
"hello href=\"\" world ")]
141175
public void ParseLocalLinks(string input, string result)
142176
{
143177
// setup a mock URL provider which we'll use for testing

0 commit comments

Comments
 (0)