-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathJSoup_Extract_From_URL.js
More file actions
38 lines (31 loc) · 987 Bytes
/
JSoup_Extract_From_URL.js
File metadata and controls
38 lines (31 loc) · 987 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
var extract = function (doc) {
if(doc !== null && doc.getId() !== null){
var Jsoup = org.jsoup.Jsoup;
var Document = org.jsoup.nodes.Document;
var Element = org.jsoup.nodes.Element;
var Elements = org.jsoup.select.Elements;
var pdoc = org.jsoup.nodes.Document;
var e = java.lang.Exception;
try {
pdoc = Jsoup.parse(content);
if (null !== pdoc) {
div = pdoc.select("div#body-container").first();
if (div != null) {
var txt = div.ownText();
logger.info("ADD CONTENT: " + txt);
doc.addField("content", txt);
} else {
logger.warn("Div was null");
}
}
} catch (e) {
logger.error(e);
}
}
return doc;
}