Skip to content

Commit 29ee1a4

Browse files
authored
Merge pull request #477 from metafacture/htmlMixedContent
Emit combined text for mixed content in HtmlDecoder
2 parents 355d610 + fb23d42 commit 29ee1a4

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,10 @@ private void process(final Element parent, final StreamReceiver receiver) {
9898
addedValueAsSubfield = handleAttributeValuesAsSubfields(receiver, element, attributes, attribute);
9999
receiver.literal(attribute.getKey(), attribute.getValue());
100100
}
101-
if (element.children().isEmpty()) {
102-
final String text = element.text().trim();
103-
final String value = text.isEmpty() ? element.data() : text;
104-
if (!value.isEmpty() && !addedValueAsSubfield) {
105-
receiver.literal("value", value);
106-
}
101+
final String text = element.text().trim();
102+
final String value = text.isEmpty() ? element.data() : text;
103+
if (!value.isEmpty() && !addedValueAsSubfield) {
104+
receiver.literal("value", value);
107105
}
108106
process(element, receiver);
109107
receiver.endEntity();
@@ -133,7 +131,7 @@ private boolean handleAttributeValuesAsSubfields(final StreamReceiver receiver,
133131
* @param mapString the attributes to be added as subfields
134132
*/
135133
public void setAttrValsAsSubfields(final String mapString) {
136-
this.attrValsAsSubfields = new HashMap<String, String>();
134+
this.attrValsAsSubfields = new HashMap<>();
137135
final String input = mapString.startsWith("&") ? DEFAULT_ATTR_VALS_AS_SUBFIELDS + mapString : mapString;
138136
for (final String nameValuePair : input.split("&")) {
139137
final String[] nameValue = nameValuePair.split("=");

metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ public void nestedEntities() {
7777

7878
}
7979

80+
@Test
81+
public void mixedContent() {
82+
htmlDecoder.process(new StringReader("<p>This is the <strong>full</strong> text</p>"));
83+
final InOrder ordered = inOrder(receiver);
84+
ordered.verify(receiver).startEntity("p");
85+
ordered.verify(receiver).literal("value", "This is the full text");
86+
// elements above plus body, html
87+
ordered.verify(receiver, times(4)).endEntity();
88+
89+
}
90+
8091
@Test
8192
public void htmlAttributesAsLiterals() {
8293
htmlDecoder.process(new StringReader("<p class=lead>Text"));

0 commit comments

Comments
 (0)