Skip to content

Commit 7764e8c

Browse files
committed
Fixed removed whitespace in inner text #32
1 parent 6301212 commit 7764e8c

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

src/AngleSharp.Css.Tests/Extensions/InnerText.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,15 @@ public void GetInnerText(String fixture, String expected)
8080

8181
Assert.AreEqual(expected, doc.Body.GetInnerText());
8282
}
83+
84+
[Test]
85+
public void SpanShouldNotHaveRemovedSpaces_Issue32()
86+
{
87+
var config = Configuration.Default.WithCss();
88+
var document = ("<div><div>Div with <span>a span</span> in it.</div></div>").ToHtmlDocument(config);
89+
var element = document.QuerySelector("div");
90+
91+
Assert.AreEqual("Div with a span in it.", element.GetInnerText());
92+
}
8393
}
8494
}

src/AngleSharp.Css/Extensions/ElementExtensions.cs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ public static String GetInnerText(this IElement element)
6464

6565
if (!hidden.Value)
6666
{
67+
var offset = 0;
6768
var sb = StringBuilderPool.Obtain();
6869
var requiredLineBreakCounts = new Dictionary<Int32, Int32>();
6970
InnerTextCollection(element, sb, requiredLineBreakCounts, element.ParentElement?.ComputeCurrentStyle());
7071

7172
// Remove any runs of consecutive required line break count items at the start or end of results.
7273
requiredLineBreakCounts.Remove(0);
7374
requiredLineBreakCounts.Remove(sb.Length);
74-
var offset = 0;
7575

7676
// SortedDictionary would be nicer
7777
foreach (var keyval in requiredLineBreakCounts.OrderBy(kv => kv.Key))
@@ -193,7 +193,10 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
193193

194194
if (node is IText textElement)
195195
{
196-
ProcessText(textElement.Data, sb, parentStyle);
196+
var lastLine = node.NextSibling is null ||
197+
String.IsNullOrEmpty(node.NextSibling.TextContent) ||
198+
node.NextSibling is IHtmlBreakRowElement;
199+
ProcessText(textElement.Data, sb, parentStyle, lastLine);
197200
}
198201
else if (node is IHtmlBreakRowElement)
199202
{
@@ -225,14 +228,14 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
225228
}
226229
else if (node is IHtmlParagraphElement)
227230
{
228-
requiredLineBreakCounts.TryGetValue(startIndex, out int startIndexCount);
231+
requiredLineBreakCounts.TryGetValue(startIndex, out var startIndexCount);
229232

230233
if (startIndexCount < 2)
231234
{
232235
requiredLineBreakCounts[startIndex] = 2;
233236
}
234237

235-
requiredLineBreakCounts.TryGetValue(sb.Length, out int endIndexCount);
238+
requiredLineBreakCounts.TryGetValue(sb.Length, out var endIndexCount);
236239

237240
if (endIndexCount < 2)
238241
{
@@ -255,14 +258,14 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
255258

256259
if (isBlockLevel.Value)
257260
{
258-
requiredLineBreakCounts.TryGetValue(startIndex, out int startIndexCount);
261+
requiredLineBreakCounts.TryGetValue(startIndex, out var startIndexCount);
259262

260263
if (startIndexCount < 1)
261264
{
262265
requiredLineBreakCounts[startIndex] = 1;
263266
}
264267

265-
requiredLineBreakCounts.TryGetValue(sb.Length, out int endIndexCount);
268+
requiredLineBreakCounts.TryGetValue(sb.Length, out var endIndexCount);
266269

267270
if (endIndexCount < 1)
268271
{
@@ -387,7 +390,7 @@ private static Boolean IsBlockLevel(INode node)
387390
}
388391
}
389392

390-
private static void ProcessText(String text, StringBuilder sb, ICssStyleDeclaration style)
393+
private static void ProcessText(String text, StringBuilder sb, ICssStyleDeclaration style, Boolean lastLine)
391394
{
392395
var startIndex = sb.Length;
393396
var whiteSpace = style?.GetWhiteSpace();
@@ -459,11 +462,13 @@ private static void ProcessText(String text, StringBuilder sb, ICssStyleDeclarat
459462
sb.Append(c);
460463
}
461464

462-
if (isWhiteSpace) // ended with whitespace
465+
// ended with whitespace
466+
if (isWhiteSpace && lastLine)
463467
{
464468
for (var offset = sb.Length - 1; offset >= startIndex; offset--)
465469
{
466470
var c = sb[offset];
471+
467472
if (!Char.IsWhiteSpace(c) || c == Symbols.NoBreakSpace)
468473
{
469474
sb.Remove(offset + 1, sb.Length - 1 - offset);

0 commit comments

Comments
 (0)