-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathSolrQueryCheck.js
More file actions
128 lines (116 loc) · 5.32 KB
/
SolrQueryCheck.js
File metadata and controls
128 lines (116 loc) · 5.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
function doWork(doc, ctx, collection, solrServer, solrServerFactory) {
/*var imports = new JavaImporter(
org.apache.solr.client.solrj.SolrQuery,
org.apache.solr.client.solrj.util.ClientUtils);
*/
if (doc !== null && doc.getId() !== null) {
try {
var SolrQuery = org.apache.solr.client.solrj.SolrQuery;
var ClientUtils = org.apache.solr.client.solrj.util.ClientUtils;
var ex = java.lang.Exception;
// with (imports) {
var keywords = doc.getFirstFieldValue('keywords_t_en_hl');
var uuid = doc.getFirstField('jcr_uuid') ? doc.getFirstField('jcr_uid').getValue() : '';
if (keywords !== null) {
logger.info('Found Document with keywords Id:' + doc.getId() + ' JCR_uuid: ' + uuid + ' Keywords: ' + keywords);
var client = solrServerFactory.getSolrServer("Search_Keywords");
for (var j = 0; j < keywords.length; j++) {
var keywordTagId = '/etc/tags/' + keywords[j].replace(/\:/g, '/');
if (client !== null) {
var q = "id:" + "\"" + keywordTagId + "\"";
var query = new SolrQuery();
query.setRows(1);
query.setQuery(q);
var res = client.query(query);
if (res !== null) {
doc.addField('keywords_t', res.getResults().get('docs').get('jcr_title_t'));
} else {
logger.error(' Invalid keyword in document ' + doc.getId() + ' tag reference:' + q);
}
}
}
logger.info('Added keywords to document ' + doc.getId() + ' ' + doc.getFieldValues('keywords_t'));
} else {
logger.info('Not processing Document ' + doc.getId() + ' JCR_uuid: ' + uuid);
if (doc.getId() === '/content/healthlibrary/home/hl/wellness/quit_tobacco/get_ready/0010-3C-10-reasons-to-quit') {
logger.info('Fields for ' + doc.getId() + ' JCR_uuid: ' + uuid + ' Field Names:' + doc.getAllFieldNames());
}
}
// }
} catch (ex) {
logger.error(ex.getLocalizedMessage());
}
}
return doc;
}
/* Add Additional Fields Stage */
var solrQueryCheck = function (doc) {
var url_field = 'url_t';
var html_ext = '.html';
var desc_en_field = 'description_t_en_hl';
var body_en_field = 'body_t_en_hl';
var title_field = 'title_suhg';
var text_field = 'text';
var jcr_title_field = 'jcr_title_t';
var title_text_field = 'title_t_en_hl';
var lang_field = 'language_t';
doc.addField(lang_field, 'en');
/* create url_t field for documents */
var url = doc.getId() + html_ext;
doc.addField(url_field, url);
/* copy text values to body_en_t field for documents */
if (doc.hasField(text_field)) {
for (var i = 0; i < doc.getFieldValues(text_field).length; i++) {
doc.addField(body_en_field, doc.getFieldValues(text_field)[i]);
}
doc.removeFields(text_field);
}
/* replace html tags from description_en_t and body_en_t fields */
var regex = new RegExp('<\/?(?:[phib])?(?:(br)?(ul)?(li)?)[1-4]?>|\\r\\n| ', 'g');
if (doc.hasField(desc_en_field)) {
var desc = doc.getFirstFieldValue(desc_en_field);
desc = desc.replace(regex, ' ').trim();
doc.setField(desc_en_field, desc);
}
if (doc.hasField(body_en_field)) {
var bodyArr = [];
for (var i = 0; i < doc.getFieldValues(body_en_field).length; i++) {
var body = doc.getFieldValues(body_en_field)[i];
body = body.replace(regex, ' ').trim();
bodyArr.push(body);
}
doc.removeFields(body_en_field);
for (var i = 0; i < bodyArr.length; i++) {
doc.addField(body_en_field, bodyArr[i]);
}
}
/* create description field if still empty */
if (!doc.hasField(desc_en_field)) {
doc.setField(desc_en_field, doc.getFirstFieldValue(body_en_field));
}
/* remove whitespace from title sort field */
if (doc.hasField(title_field)) {
var title = doc.getFirstFieldValue(title_field);
title = title.replace(/\//g, " ");//to remove forwardslash in title_suhg
title = title.replace(/^\s+/, "");// remove leading whitespace
title = title.replace(/\s+/g, " "); // multiple whitespace to one space
doc.setField(title_field, title);
} else {
if (doc.hasField(jcr_title_field)) {
var title = doc.getFirstFieldValue(jcr_title_field);
doc.addField(title_text_field, title);
var pattern = new RegExp('[$&+,:;=?@#|\'<>.^*()%!-]', 'g');
title = title.replace(pattern, '');
title = title.replace(/\//g, " ");//to remove forwardslash in title_suhg
title = title.replace(/^\s+/, "");// remove leading whitespace
title = title.replace(/\s+/g, " "); // multiple whitespace to one space
doc.addField(title_field, title);
}
}
return doc;
}