Skip to content

Commit 6485d17

Browse files
committed
Emit combined text for mixed content in HtmlDecoder
Was only emitting the children's text before
1 parent 8a3e605 commit 6485d17

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,10 @@ private void process(final Element parent, final StreamReceiver receiver) {
9898
addedValueAsSubfield = handleAttributeValuesAsSubfields(receiver, element, attributes, attribute);
9999
receiver.literal(attribute.getKey(), attribute.getValue());
100100
}
101-
if (element.children().isEmpty()) {
102-
final String text = element.text().trim();
103-
final String value = text.isEmpty() ? element.data() : text;
104-
if (!value.isEmpty() && !addedValueAsSubfield) {
105-
receiver.literal("value", value);
106-
}
101+
final String text = element.text().trim();
102+
final String value = text.isEmpty() ? element.data() : text;
103+
if (!value.isEmpty() && !addedValueAsSubfield) {
104+
receiver.literal("value", value);
107105
}
108106
process(element, receiver);
109107
receiver.endEntity();

metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ public void nestedEntities() {
7777

7878
}
7979

80+
@Test
81+
public void mixedContent() {
82+
htmlDecoder.process(new StringReader("<p>This is the <strong>full</strong> text</p>"));
83+
final InOrder ordered = inOrder(receiver);
84+
ordered.verify(receiver).startEntity("p");
85+
ordered.verify(receiver).literal("value", "This is the full text");
86+
// elements above plus body, html
87+
ordered.verify(receiver, times(4)).endEntity();
88+
89+
}
90+
8091
@Test
8192
public void htmlAttributesAsLiterals() {
8293
htmlDecoder.process(new StringReader("<p class=lead>Text"));

0 commit comments

Comments
 (0)