Skip to content

Commit 7812615

Browse files
committed
metafacture-html/ (main): Fix Checkstyle violations.
1 parent 2f8045f commit 7812615

File tree

4 files changed

+68
-59
lines changed

4 files changed

+68
-59
lines changed

metafacture-html/src/main/java/org/metafacture/html/ElementExtractor.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,24 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.metafacture.html;
1716

18-
import java.io.IOException;
19-
import java.io.Reader;
17+
package org.metafacture.html;
2018

21-
import org.apache.commons.io.IOUtils;
22-
import org.jsoup.Jsoup;
23-
import org.jsoup.nodes.Document;
24-
import org.jsoup.nodes.Element;
2519
import org.metafacture.framework.FluxCommand;
2620
import org.metafacture.framework.ObjectReceiver;
2721
import org.metafacture.framework.annotations.Description;
2822
import org.metafacture.framework.annotations.In;
2923
import org.metafacture.framework.annotations.Out;
3024
import org.metafacture.framework.helpers.DefaultObjectPipe;
3125

26+
import org.apache.commons.io.IOUtils;
27+
import org.jsoup.Jsoup;
28+
import org.jsoup.nodes.Document;
29+
import org.jsoup.nodes.Element;
30+
31+
import java.io.IOException;
32+
import java.io.Reader;
33+
3234
/**
3335
* Extracts the the specified element from an HTML document
3436
*
@@ -51,10 +53,11 @@ public ElementExtractor(final String selector) {
5153
@Override
5254
public void process(final Reader reader) {
5355
try {
54-
Document document = Jsoup.parse(IOUtils.toString(reader));
55-
Element firstElement = document.select(selector).first();
56+
final Document document = Jsoup.parse(IOUtils.toString(reader));
57+
final Element firstElement = document.select(selector).first();
5658
getReceiver().process(firstElement.data());
57-
} catch (IOException e) {
59+
}
60+
catch (final IOException e) {
5861
e.printStackTrace();
5962
}
6063
}

metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,60 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16+
1617
package org.metafacture.html;
1718

18-
import java.io.IOException;
19-
import java.io.Reader;
20-
import java.io.UnsupportedEncodingException;
21-
import java.net.URLDecoder;
22-
import java.nio.charset.StandardCharsets;
23-
import java.util.HashMap;
24-
import java.util.Map;
25-
import java.util.UUID;
19+
import org.metafacture.framework.FluxCommand;
20+
import org.metafacture.framework.StreamReceiver;
21+
import org.metafacture.framework.annotations.Description;
22+
import org.metafacture.framework.annotations.In;
23+
import org.metafacture.framework.annotations.Out;
24+
import org.metafacture.framework.helpers.DefaultObjectPipe;
2625

2726
import org.apache.commons.io.IOUtils;
2827
import org.jsoup.Jsoup;
2928
import org.jsoup.nodes.Attribute;
3029
import org.jsoup.nodes.Attributes;
3130
import org.jsoup.nodes.Document;
3231
import org.jsoup.nodes.Element;
33-
import org.metafacture.framework.FluxCommand;
34-
import org.metafacture.framework.StreamReceiver;
35-
import org.metafacture.framework.annotations.Description;
36-
import org.metafacture.framework.annotations.In;
37-
import org.metafacture.framework.annotations.Out;
38-
import org.metafacture.framework.helpers.DefaultObjectPipe;
3932
import org.slf4j.Logger;
4033
import org.slf4j.LoggerFactory;
4134

35+
import java.io.IOException;
36+
import java.io.Reader;
37+
import java.io.UnsupportedEncodingException;
38+
import java.net.URLDecoder;
39+
import java.nio.charset.StandardCharsets;
40+
import java.util.HashMap;
41+
import java.util.Map;
42+
import java.util.UUID;
43+
4244
/**
4345
* Decode HTML to metadata events. Each input document represents one record.
4446
*
4547
* @author Fabian Steeg (fsteeg)
4648
*
4749
*/
48-
@Description("Decode HTML to metadata events. The attrValsAsSubfields option can be used to override "
49-
+ "the default attribute values to be used as subfields (e.g. by default "
50-
+ "`link rel=\"canonical\" href=\"http://example.org\"` becomes `link.canonical`). "
51-
+ "It expects an HTTP-style query string specifying as key the attributes whose value should "
52-
+ "be used as a subfield, and as value the attribute whose value should be the subfield value, "
53-
+ "e.g. the default contains `link.rel=href`. To use the HTML element text as the value "
54-
+ "(instead of another attribute), omit the value of the query-string key-value pair, "
55-
+ "e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, "
56-
+ "e.g. `&h3.class`")
50+
@Description("Decode HTML to metadata events. The attrValsAsSubfields option can be used to override " +
51+
"the default attribute values to be used as subfields (e.g. by default " +
52+
"`link rel=\"canonical\" href=\"http://example.org\"` becomes `link.canonical`). " +
53+
"It expects an HTTP-style query string specifying as key the attributes whose value should " +
54+
"be used as a subfield, and as value the attribute whose value should be the subfield value, " +
55+
"e.g. the default contains `link.rel=href`. To use the HTML element text as the value " +
56+
"(instead of another attribute), omit the value of the query-string key-value pair, " +
57+
"e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, " +
58+
"e.g. `&h3.class`")
5759
@In(Reader.class)
5860
@Out(StreamReceiver.class)
5961
@FluxCommand("decode-html")
6062
public class HtmlDecoder extends DefaultObjectPipe<Reader, StreamReceiver> {
6163

64+
private static final Logger LOG = LoggerFactory.getLogger(HtmlDecoder.class);
65+
6266
private static final String DEFAULT_ATTR_VALS_AS_SUBFIELDS = //
6367
"meta.name=content&meta.property=content&link.rel=href&a.rel=href";
68+
6469
private Map<String, String> attrValsAsSubfields;
65-
private static final Logger LOG =
66-
LoggerFactory.getLogger(HtmlDecoder.class);
6770

6871
public HtmlDecoder() {
6972
setAttrValsAsSubfields(DEFAULT_ATTR_VALS_AS_SUBFIELDS);
@@ -72,28 +75,29 @@ public HtmlDecoder() {
7275
@Override
7376
public void process(final Reader reader) {
7477
try {
75-
StreamReceiver receiver = getReceiver();
78+
final StreamReceiver receiver = getReceiver();
7679
receiver.startRecord(UUID.randomUUID().toString());
77-
Document document = Jsoup.parse(IOUtils.toString(reader));
80+
final Document document = Jsoup.parse(IOUtils.toString(reader));
7881
process(document, receiver);
7982
receiver.endRecord();
80-
} catch (IOException e) {
83+
}
84+
catch (final IOException e) {
8185
LOG.error(e.getMessage(), e);
8286
}
8387
}
8488

85-
private void process(Element parent, StreamReceiver receiver) {
86-
for (Element element : parent.children()) {
89+
private void process(final Element parent, final StreamReceiver receiver) {
90+
for (final Element element : parent.children()) {
8791
receiver.startEntity(element.nodeName());
88-
Attributes attributes = element.attributes();
92+
final Attributes attributes = element.attributes();
8993
boolean addedValueAsSubfield = false;
90-
for (Attribute attribute : attributes) {
94+
for (final Attribute attribute : attributes) {
9195
addedValueAsSubfield = handleAttributeValuesAsSubfields(receiver, element, attributes, attribute);
9296
receiver.literal(attribute.getKey(), attribute.getValue());
9397
}
9498
if (element.children().isEmpty()) {
95-
String text = element.text().trim();
96-
String value = text.isEmpty() ? element.data() : text;
99+
final String text = element.text().trim();
100+
final String value = text.isEmpty() ? element.data() : text;
97101
if (!value.isEmpty() && !addedValueAsSubfield) {
98102
receiver.literal("value", value);
99103
}
@@ -103,34 +107,34 @@ private void process(Element parent, StreamReceiver receiver) {
103107
}
104108
}
105109

106-
private boolean handleAttributeValuesAsSubfields(StreamReceiver receiver, Element element,
107-
Attributes attributes, Attribute attribute) {
108-
String fullFieldKey = element.nodeName() + "." + attribute.getKey();
110+
private boolean handleAttributeValuesAsSubfields(final StreamReceiver receiver, final Element element, final Attributes attributes, final Attribute attribute) {
111+
final String fullFieldKey = element.nodeName() + "." + attribute.getKey();
109112
if (attrValsAsSubfields.containsKey(fullFieldKey)) {
110-
String configValue = attrValsAsSubfields.get(fullFieldKey);
113+
final String configValue = attrValsAsSubfields.get(fullFieldKey);
111114
if (configValue.trim().isEmpty()) {
112115
receiver.literal(attribute.getValue(), element.text().trim());
113116
return true;
114-
} else {
115-
String value = attributes.get(configValue);
117+
}
118+
else {
119+
final String value = attributes.get(configValue);
116120
receiver.literal(attribute.getValue(), value);
117121
}
118122
}
119123
return false;
120124
}
121125

122-
public void setAttrValsAsSubfields(String mapString) {
126+
public void setAttrValsAsSubfields(final String mapString) {
123127
this.attrValsAsSubfields = new HashMap<String, String>();
124-
String input = mapString.startsWith("&") ? DEFAULT_ATTR_VALS_AS_SUBFIELDS + mapString
125-
: mapString;
126-
for (String nameValuePair : input.split("&")) {
127-
String[] nameValue = nameValuePair.split("=");
128+
final String input = mapString.startsWith("&") ? DEFAULT_ATTR_VALS_AS_SUBFIELDS + mapString : mapString;
129+
for (final String nameValuePair : input.split("&")) {
130+
final String[] nameValue = nameValuePair.split("=");
128131
try {
129-
String utf8 = StandardCharsets.UTF_8.name();
130-
String key = URLDecoder.decode(nameValue[0], utf8);
131-
String val = nameValue.length > 1 ? URLDecoder.decode(nameValue[1], utf8) : "";
132+
final String utf8 = StandardCharsets.UTF_8.name();
133+
final String key = URLDecoder.decode(nameValue[0], utf8);
134+
final String val = nameValue.length > 1 ? URLDecoder.decode(nameValue[1], utf8) : "";
132135
attrValsAsSubfields.put(key, val);
133-
} catch (UnsupportedEncodingException e) {
136+
}
137+
catch (final UnsupportedEncodingException e) {
134138
LOG.error(e.getMessage(), e);
135139
}
136140
}

metafacture-html/src/test/java/org/metafacture/html/ElementExtractorTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16+
1617
package org.metafacture.html;
1718

1819
import static org.mockito.Mockito.verify;

metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16+
1617
package org.metafacture.html;
1718

1819
import static org.mockito.Mockito.inOrder;

0 commit comments

Comments
 (0)