-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathJsoupXmlParse.js
More file actions
70 lines (61 loc) · 2.38 KB
/
JsoupXmlParse.js
File metadata and controls
70 lines (61 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
var jsoupxml = function (doc) {
var Base64 = org.apache.commons.codec.binary.Base64;
var doclist = java.util.ArrayList;
var Jsoup = org.jsoup.Jsoup;
var jdoc = org.jsoup.nodes.Document;
var ex = java.lang.Exception;
var Parser = org.jsoup.parser.Parser;
var element = org.jsoup.Element;
var pipelineDoc = com.lucidworks.apollo.common.pipeline.PipelineDocument;
var xmlstr = java.lang.String;
var docurl = java.lang.String;
var elements = org.jsoup.select.Elements;
var ele = org.jsoup.Element;
var outdocs = java.util.ArrayList;
var String = java.lang.String;
var base64 = java.util.Base64;
var decoder = base64.getDecoder();
doclist = new java.util.ArrayList();
outdocs = new java.util.ArrayList();
try {
if (doc !== null && doc.getId() !== null) {
var raw = doc.getFirstFieldValue("_raw_content_");
if (raw !== null) {
xmlstr = new String(decoder.decode(raw));
if (xmlstr) {
logger.warn("xml str: " + xmlstr);
doc.addField("html_s", xmlstr);
jdoc = Jsoup.parse(xmlstr, '', Parser.xmlParser());
var elements = jdoc.select("img");
var biz = jdoc.select('img').first();
logger.warn("Biz: " + biz);
logger.warn("Elements: " + elements);
var ele = elements.first();
//logger.warn("Ele: " + ele);
//logger.warn("JDoc: " + jdoc + " ---------------------");
// test/sanity-check code
var foo = '<html>\
<body>\
<img src="mypic.png" alt="Test image" title="Sample title" />\
</body>\
</html>';
jbar = Jsoup.parse(foo, '', Parser.xmlParser());
var baz = jbar.select('img').first();
logger.warn("Baz: " + baz);
} else {
logger.error("no body :-( -------------------");
}
}
} else {
logger.debug("!!!!!!!!!!!!!!! No doc object -------------------");
}
} catch (ex) {
logger.error(ex);
}
return doc;
}