Skip to content

Commit 63c6fad

Browse files
authored
fix: BibleTextNode whitespace parsing
fix: BibleTextNode whitespace parsing This makes the WMB render correctly, despite it having lots of unintended (but theoretically fine) <span> nodes.
1 parent 5b89856 commit 63c6fad

File tree

2 files changed

+68
-17
lines changed

2 files changed

+68
-17
lines changed

Sources/YouVersionPlatformCore/Bible/BibleTextNode.swift

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,29 +114,32 @@ public class BibleTextNode {
114114
}
115115

116116
func parser(_ parser: XMLParser, foundCharacters string: String) {
117-
let collapsed = string.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
118-
let core = collapsed.trimmingCharacters(in: .whitespacesAndNewlines)
119-
guard !core.isEmpty else {
117+
guard let current = stack.last else {
120118
return
121119
}
122-
let leadingSpace = string.first?.isWhitespace == true
123-
let trailingSpace = string.last?.isWhitespace == true
124-
var segment = core
125-
if leadingSpace {
126-
segment = " " + segment
127-
}
128-
if trailingSpace {
129-
segment += " "
130-
}
131-
guard let current = stack.last else {
120+
121+
let segment = string.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
122+
guard !segment.isEmpty else {
132123
return
133124
}
134125

126+
if segment == " " {
127+
guard let previousChild = current.children.last else {
128+
return
129+
}
130+
guard previousChild.type == .span || previousChild.type == .text else {
131+
return
132+
}
133+
}
134+
135135
if let lastChild = current.children.last, lastChild.type == .text {
136-
lastChild.textSegments.append(segment)
137-
let joined = lastChild.textSegments.joined()
136+
let joined = (lastChild.text + segment)
137+
.replacingOccurrences(of: " {2,}", with: " ", options: .regularExpression)
138+
guard !joined.isEmpty else {
139+
return
140+
}
141+
lastChild.textSegments = [joined]
138142
lastChild.text = joined
139-
lastChild.textSegments = joined.isEmpty ? [] : [joined]
140143
} else {
141144
let textNode = BibleTextNode(name: "text")
142145
textNode.textSegments = [segment]
@@ -148,7 +151,6 @@ public class BibleTextNode {
148151
func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
149152
_ = stack.popLast()
150153
}
151-
152154
}
153155
}
154156

Tests/YouVersionPlatformCoreTests/BibleTextNodeTests.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,52 @@ func testParse_GenesisIntroContainsText() throws {
8181
let texts = collectTexts(root)
8282
#expect(texts.contains("In the beginning, God created the heavens and the earth."))
8383
}
84+
85+
@Test
86+
func testParse_SpacesBetweenInlineSpansArePreserved() throws {
87+
let html = "<div><span>One</span> <span>Two</span> <span>Three three three</span>.</div>"
88+
89+
let root = try #require(try BibleTextNode.parse(html))
90+
let block = try #require(root.children.first)
91+
92+
let renderedText = collectRenderedText(from: block)
93+
#expect(renderedText == "One Two Three three three.")
94+
}
95+
96+
@Test
97+
func testParse_MixedWhitespaceAndInlineNodesCollapseToSingleSpaces() throws {
98+
let html = "<div> Start <span>middle</span>\n\t <span>end</span> done </div>"
99+
100+
let root = try #require(try BibleTextNode.parse(html))
101+
let block = try #require(root.children.first)
102+
103+
let renderedText = collectRenderedText(from: block)
104+
#expect(renderedText == "Start middle end done")
105+
}
106+
107+
@Test
108+
func testParse_LeadingWhitespaceBeforeFirstChildIsIgnored() throws {
109+
let html = "<div> <span>One</span> <span>Two</span></div>"
110+
111+
let root = try #require(try BibleTextNode.parse(html))
112+
let block = try #require(root.children.first)
113+
114+
let renderedText = collectRenderedText(from: block)
115+
#expect(renderedText == "One Two")
116+
}
117+
118+
private func collectRenderedText(from node: BibleTextNode) -> String {
119+
var text = ""
120+
appendRenderedText(from: node, into: &text)
121+
return text.trimmingCharacters(in: .whitespacesAndNewlines)
122+
}
123+
124+
private func appendRenderedText(from node: BibleTextNode, into text: inout String) {
125+
if node.type == .text {
126+
text += node.text
127+
}
128+
129+
for child in node.children {
130+
appendRenderedText(from: child, into: &text)
131+
}
132+
}

0 commit comments

Comments
 (0)