Skip to content

Commit d0da8b3

Browse files
committed
add filter to update value or do something
1 parent bb55ea0 commit d0da8b3

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

src/main/java/im/nll/data/extractor/Extractors.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public class Extractors {
2424
private String html;
2525
private List<String> htmlList;
2626
private Map<String, List<Extractor>> extractorsMap = Maps.newLinkedHashMap();
27+
private Map<String, List<Filter>> filtersMap = Maps.newLinkedHashMap();
2728
private String prevField;
2829

2930
public Extractors(String html) {
@@ -55,6 +56,14 @@ public Extractors with(Extractor extractor) {
5556
return this;
5657
}
5758

59+
public Extractors filter(Filter filter) {
60+
Validate.notNull(prevField, "must call extract method first!");
61+
List<Filter> filters = filtersMap.getOrDefault(prevField, Lists.newLinkedList());
62+
filters.add(filter);
63+
filtersMap.put(prevField, filters);
64+
return this;
65+
}
66+
5867

5968
/**
6069
* split html use listable extractor
@@ -74,6 +83,10 @@ public String asString() {
7483
for (Extractor extractor : extractors) {
7584
result = extractor.extract(result);
7685
}
86+
List<Filter> filters = filtersMap.getOrDefault(DEFAULT_FIELD, Lists.newLinkedList());
87+
for (Filter filter : filters) {
88+
result = filter.process(result);
89+
}
7790
return result;
7891
}
7992

@@ -86,6 +99,10 @@ public Map<String, String> asMap() {
8699
for (Extractor extractor : extractors) {
87100
result = extractor.extract(result);
88101
}
102+
List<Filter> filters = filtersMap.getOrDefault(name, Lists.newLinkedList());
103+
for (Filter filter : filters) {
104+
result = filter.process(result);
105+
}
89106
try {
90107
map.put(name, result);
91108
} catch (Exception e) {
@@ -107,6 +124,10 @@ public List<Map<String, String>> asMapList() {
107124
for (Extractor extractor : extractors) {
108125
result = extractor.extract(result);
109126
}
127+
List<Filter> filters = filtersMap.getOrDefault(name, Lists.newLinkedList());
128+
for (Filter filter : filters) {
129+
result = filter.process(result);
130+
}
110131
map.put(name, result);
111132
}
112133
mapList.add(map);
@@ -124,6 +145,10 @@ public <T> T asBean(Class<T> clazz) {
124145
for (Extractor extractor : extractors) {
125146
result = extractor.extract(result);
126147
}
148+
List<Filter> filters = filtersMap.getOrDefault(name, Lists.newLinkedList());
149+
for (Filter filter : filters) {
150+
result = filter.process(result);
151+
}
127152
try {
128153
Reflect.on(entity).set(name, result);
129154
} catch (Exception e) {
@@ -150,6 +175,10 @@ public <T> List<T> asBeanList(Class<T> clazz) {
150175
for (Extractor extractor : extractors) {
151176
result = extractor.extract(result);
152177
}
178+
List<Filter> filters = filtersMap.getOrDefault(name, Lists.newLinkedList());
179+
for (Filter filter : filters) {
180+
result = filter.process(result);
181+
}
153182
try {
154183
Reflect.on(entity).set(name, result);
155184
} catch (Exception e) {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package im.nll.data.extractor;
2+
3+
/**
4+
* Value Filter
5+
*
6+
* @author <a href="mailto:[email protected]">fivesmallq</a>
7+
* @version Revision: 1.0
8+
* @date 16/1/11 下午6:17
9+
*/
10+
public interface Filter {
11+
String process(String value);
12+
}

src/test/java/im/nll/data/extractor/ExtractorsTest.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ public void testGet() throws Exception {
4545
Assert.assertEquals("Talk is cheap. Show me the code.", description);
4646
}
4747

48+
@Test
49+
public void testGetWithFilter() throws Exception {
50+
String title = Extractors.on(baseHtml).extract(selector("a.title")).asString();
51+
String followers = Extractors.on(baseHtml).extract(selector("div.followers")).with(regex("\\d+")).asString();
52+
//use filter to process value
53+
String description = Extractors.on(baseHtml).extract(selector("div.description")).filter(value -> value.toLowerCase()).asString();
54+
Assert.assertEquals("fivesmallq", title);
55+
Assert.assertEquals("29671", followers);
56+
Assert.assertEquals("talk is cheap. show me the code.", description);
57+
}
58+
4859
@Test
4960
public void testToMap() throws Exception {
5061
Map<String, String> dataMap = Extractors.on(baseHtml)
@@ -114,6 +125,21 @@ public void testToBeanListByXPath() throws Exception {
114125
Assert.assertEquals(second.getUrl(), "https://www.ruby-lang.org");
115126
}
116127

128+
@Test
129+
public void testToBeanListFilter() throws Exception {
130+
List<Language> languages = Extractors.on(listHtml).split(xpath("//tr[@class='item']"))
131+
.extract("type", xpath("//td[1]/text()")).filter(value -> "type:" + value)
132+
.extract("name", xpath("//td[2]/text()")).filter(value -> "name:" + value)
133+
.extract("url", xpath("//td[3]/text()")).filter(value -> "url:" + value)
134+
.asBeanList(Language.class);
135+
Assert.assertNotNull(languages);
136+
Language second = languages.get(1);
137+
Assert.assertEquals(languages.size(), 3);
138+
Assert.assertEquals(second.getType(), "type:dynamic");
139+
Assert.assertEquals(second.getName(), "name:Ruby");
140+
Assert.assertEquals(second.getUrl(), "url:https://www.ruby-lang.org");
141+
}
142+
117143
@Test
118144
public void testToBeanListByJson() throws Exception {
119145
List<Book> books = Extractors.on(jsonString).split(json("$..book.*"))

0 commit comments

Comments
 (0)