Skip to content
This repository was archived by the owner on Nov 25, 2020. It is now read-only.

Commit b790df3

Browse files
committed
Finish ElasticSearch implementation / Refactor some method to common parent with Lucene.
1 parent d20c545 commit b790df3

File tree

4 files changed

+156
-86
lines changed

4 files changed

+156
-86
lines changed

core/src/plugins/core.index/class.AbstractSearchEngineIndexer.php

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@
88

99
abstract class AbstractSearchEngineIndexer extends AJXP_AbstractMetaSource {
1010

11+
/**
12+
* @param DOMNode $contribNode
13+
*/
14+
public function parseSpecificContributions(&$contribNode){
15+
parent::parseSpecificContributions($contribNode);
16+
if($this->getFilteredOption("HIDE_MYSHARES_SECTION") !== true) return;
17+
if($contribNode->nodeName != "client_configs") return ;
18+
$actionXpath=new DOMXPath($contribNode->ownerDocument);
19+
$nodeList = $actionXpath->query('component_config[@className="AjxpPane::navigation_scroller"]', $contribNode);
20+
if(!$nodeList->length) return ;
21+
$contribNode->removeChild($nodeList->item(0));
22+
}
23+
1124
/**
1225
* @param AJXP_Node $ajxpNode
1326
* @return null|string
@@ -63,4 +76,63 @@ protected function extractIndexableContent($ajxpNode){
6376
return null;
6477
}
6578

79+
/**
80+
* @param String $query
81+
* @return String mixed
82+
*/
83+
protected function filterSearchRangesKeywords($query)
84+
{
85+
if (strpos($query, "AJXP_SEARCH_RANGE_TODAY") !== false) {
86+
$t1 = date("Ymd");
87+
$t2 = date("Ymd");
88+
$query = str_replace("AJXP_SEARCH_RANGE_TODAY", "[$t1 TO $t2]", $query);
89+
} else if (strpos($query, "AJXP_SEARCH_RANGE_YESTERDAY") !== false) {
90+
$t1 = date("Ymd", mktime(0,0,0,date('m'), date('d')-1, date('Y')));
91+
$t2 = date("Ymd", mktime(0,0,0,date('m'), date('d')-1, date('Y')));
92+
$query = str_replace("AJXP_SEARCH_RANGE_YESTERDAY", "[$t1 TO $t2]", $query);
93+
} else if (strpos($query, "AJXP_SEARCH_RANGE_LAST_WEEK") !== false) {
94+
$t1 = date("Ymd", mktime(0,0,0,date('m'), date('d')-7, date('Y')));
95+
$t2 = date("Ymd", mktime(0,0,0,date('m'), date('d'), date('Y')));
96+
$query = str_replace("AJXP_SEARCH_RANGE_LAST_WEEK", "[$t1 TO $t2]", $query);
97+
} else if (strpos($query, "AJXP_SEARCH_RANGE_LAST_MONTH") !== false) {
98+
$t1 = date("Ymd", mktime(0,0,0,date('m')-1, date('d'), date('Y')));
99+
$t2 = date("Ymd", mktime(0,0,0,date('m'), date('d'), date('Y')));
100+
$query = str_replace("AJXP_SEARCH_RANGE_LAST_MONTH", "[$t1 TO $t2]", $query);
101+
} else if (strpos($query, "AJXP_SEARCH_RANGE_LAST_YEAR") !== false) {
102+
$t1 = date("Ymd", mktime(0,0,0,date('m'), date('d'), date('Y')-1));
103+
$t2 = date("Ymd", mktime(0,0,0,date('m'), date('d'), date('Y')));
104+
$query = str_replace("AJXP_SEARCH_RANGE_LAST_YEAR", "[$t1 TO $t2]", $query);
105+
}
106+
107+
$split = array_map("trim", explode("AND", $query));
108+
foreach($split as $s){
109+
list($k, $v) = explode(":", $s, 2);
110+
if($k == "ajxp_bytesize"){
111+
//list($from, $to) = sscanf($v, "[%s TO %s]");
112+
preg_match('/\[(.*) TO (.*)\]/', $v, $matches);
113+
$oldSize = $s;
114+
$newSize = "ajxp_bytesize:[".intval(AJXP_Utils::convertBytes($matches[1]))." TO ".intval(AJXP_Utils::convertBytes($matches[2]))."]";
115+
}
116+
}
117+
if(isSet($newSize) && isSet($oldSize)){
118+
$query = str_replace($oldSize, $newSize, $query);
119+
}
120+
121+
return $query;
122+
}
123+
124+
/**
125+
* @param String $repositoryId
126+
* @param String $userId
127+
* @return string
128+
*/
129+
protected function buildSpecificId($repositoryId, $userId = null){
130+
$specificId = "";
131+
$specKey = $this->getFilteredOption("repository_specific_keywords");
132+
if (!empty($specKey)) {
133+
$specificId = "-".str_replace(array(",", "/"), array("-", "__"), AJXP_VarsFilter::filter($specKey, $userId));
134+
}
135+
return $repositoryId.$specificId;
136+
}
137+
66138
}

core/src/plugins/index.elasticsearch/class.AjxpElasticSearch.php

Lines changed: 82 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class AjxpElasticSearch extends AbstractSearchEngineIndexer
6262

6363
public function init($options)
6464
{
65-
parent::init($options);;
65+
parent::init($options);
6666
$metaFields = $this->getFilteredOption("index_meta_fields");
6767
$specKey = $this->getFilteredOption("repository_specific_keywords");
6868
if (!empty($metaFields)) {
@@ -113,7 +113,7 @@ public function indexationIndexNode($node){
113113
* @param AJXP_Node $parentNode
114114
*/
115115
public function indexationStarts($parentNode){
116-
$this->loadIndex($parentNode->getRepositoryId(), true);
116+
$this->loadIndex($parentNode->getRepositoryId(), true, $parentNode->getUser());
117117
}
118118

119119
/**
@@ -145,6 +145,42 @@ public function applyAction($actionName, $httpVars, $fileVars)
145145
throw new Exception($messages["index.lucene.7"]);
146146
}
147147

148+
$textQuery = $httpVars["query"];
149+
if($this->getFilteredOption("AUTO_WILDCARD") === true && strlen($textQuery) > 0 && ctype_alnum($textQuery)){
150+
if($textQuery[0] == '"' && $textQuery[strlen($textQuery)-1] == '"'){
151+
$textQuery = substr($textQuery, 1, -1);
152+
}else if($textQuery[strlen($textQuery)-1] != "*" ){
153+
$textQuery.="*";
154+
}
155+
}
156+
157+
158+
$this->currentIndex->open();
159+
$fieldQuery = new Elastica\Query\QueryString();
160+
$fieldQuery->setAllowLeadingWildcard(false);
161+
$fieldQuery->setFuzzyMinSim(0.8);
162+
163+
if($textQuery == "*"){
164+
165+
$fields = array("ajxp_node");
166+
$fieldQuery->setQuery("yes");
167+
$fieldQuery->setFields($fields);
168+
169+
}else if(strpos($textQuery, ":") !== false){
170+
171+
// USE LUCENE DSL DIRECTLY (key1:value1 AND key2:value2...)
172+
$textQuery = str_replace("ajxp_meta_ajxp_document_content:","body:", $textQuery);
173+
$textQuery = $this->filterSearchRangesKeywords($textQuery);
174+
$fieldQuery->setQuery($textQuery);
175+
176+
} else{
177+
178+
$fields = array("basename","ajxp_meta_*", "node_*","body");
179+
$fieldQuery->setQuery($textQuery);
180+
$fieldQuery->setFields($fields);
181+
182+
}
183+
148184
/*
149185
TODO : READAPT QUERY WITH EACH FIELD
150186
if ((isSet($this->metaFields) || $this->indexContent) && isSet($httpVars["fields"])) {
@@ -165,22 +201,8 @@ public function applyAction($actionName, $httpVars, $fileVars)
165201
$this->logDebug("Query : $query");
166202
} else {
167203
*/
168-
$this->currentIndex->open();
169-
$query = $httpVars["query"];
170-
$fieldQuery = new Elastica\Query\QueryString();
171204

172205
//}
173-
//$this->setDefaultAnalyzer();
174-
if ($query == "*") {
175-
$fields = array("ajxp_node");
176-
$fieldQuery->setQuery("yes");
177-
} else {
178-
$fields = array("basename","ajxp_meta_*", "node_*","body");
179-
$fieldQuery->setQuery($query);
180-
}
181-
$fieldQuery->setFields($fields);
182-
$fieldQuery->setAllowLeadingWildcard(false);
183-
$fieldQuery->setFuzzyMinSim(0.8);
184206
/*
185207
We create this object search because it'll allow us to fetch the number of results we want at once.
186208
We just have to set some parameters, the query type and the size of the result set.
@@ -196,14 +218,20 @@ public function applyAction($actionName, $httpVars, $fileVars)
196218
\Elastica\Search::OPTION_SEARCH_TYPE => \Elastica\Search::OPTION_SEARCH_TYPE_QUERY_THEN_FETCH,
197219
\Elastica\Search::OPTION_SIZE => $maxResults);
198220

199-
$this->logDebug(__FUNCTION__,"Executing query: ", $query);
221+
$this->logDebug(__FUNCTION__,"Executing query: ", $textQuery);
200222
$fullQuery = new Elastica\Query();
201223
$fullQuery->setQuery($fieldQuery);
202224

203-
// ADD SCOPE FILTER
204-
$term = new Elastica\Filter\Term();
205-
$term->setTerm("ajxp_scope", "shared");
206-
$fullQuery->setPostFilter($term);
225+
$qb = new Elastica\QueryBuilder();
226+
$fullQuery = new Elastica\Query();
227+
$fullQuery->setQuery(
228+
$qb->query()->filtered(
229+
$fieldQuery,
230+
$qb->filter()->bool()
231+
->addMust(new Elastica\Filter\Term(array("ajxp_scope" => "shared")))
232+
)
233+
);
234+
207235

208236
$result = $search->search($fullQuery, $searchOptions);
209237
$this->logDebug(__FUNCTION__,"Search finished. ");
@@ -236,7 +264,7 @@ public function applyAction($actionName, $httpVars, $fileVars)
236264

237265
$scope = "user";
238266
try {
239-
$this->loadIndex(ConfService::getRepository()->getId(), false);
267+
$this->loadIndex($repoId, false);
240268
} catch (Exception $ex) {
241269
throw new Exception($messages["index.lucene.7"]);
242270
}
@@ -351,7 +379,7 @@ public function recursiveIndexation($url)
351379
*/
352380
public function updateNodeIndexMeta($node)
353381
{
354-
$this->loadIndex(ConfService::getRepository()->getId());
382+
$this->loadIndex($node->getRepositoryId(), true, $node->getUser());
355383
if (AuthService::usersEnabled() && AuthService::getLoggedUser()!=null) {
356384

357385
$query = new Elastica\Query\Term();
@@ -387,9 +415,9 @@ public function updateNodeIndexMeta($node)
387415
public function updateNodeIndex($oldNode, $newNode = null, $copy = false, $recursive = false)
388416
{
389417
if($oldNode == null){
390-
$this->loadIndex($newNode->getRepositoryId());
418+
$this->loadIndex($newNode->getRepositoryId(), true, $newNode->getUser());
391419
}else{
392-
$this->loadIndex($oldNode->getRepositoryId());
420+
$this->loadIndex($oldNode->getRepositoryId(), true, $oldNode->getUser());
393421
}
394422

395423
if ($oldNode != null && $copy == false) {
@@ -412,14 +440,21 @@ public function updateNodeIndex($oldNode, $newNode = null, $copy = false, $recur
412440
// Make sure it does not already exists anyway
413441
$newDocId = $this->getIndexedDocumentId($newNode);
414442
if ($newDocId != null) {
415-
$this->currentType->deleteById($newDocId);
443+
try{
444+
$this->currentType->deleteById($newDocId);
445+
}catch (Elastica\Exception\NotFoundException $eEx){
446+
$this->logError(__FUNCTION__, "Trying to delete a non existing document");
447+
}
416448
$childrenHits = $this->getIndexedChildrenDocuments($newNode);
417-
418449
if ($childrenHits != null) {
419450
$childrenHits = $childrenHits->getResults();
420451

421452
foreach ($childrenHits as $hit) {
422-
$this->currentType->deleteById($hit->getId());
453+
try{
454+
$this->currentType->deleteById($hit->getId());
455+
}catch (Elastica\Exception\NotFoundException $eEx){
456+
$this->logError(__FUNCTION__, "Trying to delete a non existing document");
457+
}
423458
}
424459
}
425460
}
@@ -473,6 +508,14 @@ public function createIndexedDocument($ajxpNode)
473508
$data["ajxp_node"] = "yes";
474509
$data["ajxp_scope"] = "shared";
475510
$data["serialized_metadata"] = base64_encode(serialize($ajxpNode->metadata));
511+
$data["ajxp_modiftime"] = date("Ymd", $ajxpNode->ajxp_modiftime);
512+
$data["ajxp_bytesize"] = $ajxpNode->bytesize;
513+
$ajxpMime = $ajxpNode->ajxp_mime;
514+
if (empty($ajxpMime)) {
515+
$data["ajxp_mime"] = pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION);
516+
} else {
517+
$data["ajxp_mime"] = $ajxpNode->ajxp_mime;
518+
}
476519

477520
if (isSet($ajxpNode->indexableMetaKeys["shared"])) {
478521
foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
@@ -540,9 +583,10 @@ protected function dataToMappingProperties($data){
540583
$mapping_properties[$key] = array("type" => "string", "index" => "not_analyzed");
541584
} else if($key == "serialized_metadata"){
542585
$mapping_properties[$key] = array("type" => "string" /*, "index" => "no" */);
586+
} else if ($key == "ajxp_bytesize"){
587+
$mapping_properties[$key] = array("type" => "long");
543588
} else {
544589
$type = gettype($value);
545-
546590
if ($type != "integer" && $type != "boolean" && $type != "double") {
547591
$type = "string";
548592
}
@@ -599,23 +643,26 @@ public function getIndexedChildrenDocuments($ajxpNode)
599643
* load the index into the class parameter currentIndex
600644
* @param Integer $repositoryId
601645
* @param bool $create
646+
* @param null $resolveUserId
602647
*/
603-
protected function loadIndex($repositoryId, $create = true)
648+
protected function loadIndex($repositoryId, $create = true, $resolveUserId = null)
604649
{
605-
$this->currentIndex = $this->client->getIndex($repositoryId);
650+
$specificId = $this->buildSpecificId($repositoryId, $resolveUserId);
651+
652+
$this->currentIndex = $this->client->getIndex($specificId);
606653

607654
/* if the cache directory for the repository index is not created we do create it */
608-
$iPath = (defined('AJXP_SHARED_CACHE_DIR')?AJXP_SHARED_CACHE_DIR:AJXP_CACHE_DIR)."/indexes/".$repositoryId;
655+
$iPath = (defined('AJXP_SHARED_CACHE_DIR')?AJXP_SHARED_CACHE_DIR:AJXP_CACHE_DIR)."/indexes/".$specificId;
609656
if(!is_dir($iPath)) mkdir($iPath,0755, true);
610657

611-
if (!$this->currentIndex->exists() && $create) {
658+
if ($create && !$this->currentIndex->exists()) {
612659
$this->currentIndex->create();
613660
}
614661

615-
$this->currentType = new Elastica\Type($this->currentIndex, "type_".$repositoryId);
662+
$this->currentType = new Elastica\Type($this->currentIndex, "type_".$specificId);
616663

617664
/* we fetch the last id we used to create a document and set the variable nextId */
618-
$this->lastIdPath = (defined('AJXP_SHARED_CACHE_DIR')?AJXP_SHARED_CACHE_DIR:AJXP_CACHE_DIR)."/indexes/".$repositoryId."/last_id";
665+
$this->lastIdPath = (defined('AJXP_SHARED_CACHE_DIR')?AJXP_SHARED_CACHE_DIR:AJXP_CACHE_DIR)."/indexes/".$specificId."/last_id";
619666
if (file_exists($this->lastIdPath)) {
620667
$file = fopen($this->lastIdPath, "r");
621668
$this->nextId = floatval(fgets($file)) + 1;

core/src/plugins/index.elasticsearch/manifest.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<global_param name="PARSE_CONTENT_TXT" type="string" label="CONF_MESSAGE[Text files]" description="CONF_MESSAGE[List of extensions to consider as Text file and parse content]" mandatory="true" default="txt"/>
2424
<global_param name="UNOCONV" type="string" label="CONF_MESSAGE[Unoconv Path]" description="CONF_MESSAGE[Full path on the server to the 'unoconv' binary]" default="" mandatory="false"/>
2525
<global_param name="PDFTOTEXT" type="string" label="CONF_MESSAGE[PdftoText Path]" description="CONF_MESSAGE[Full path on the server to the 'pdftotext' binary]" default="" mandatory="false"/>
26-
<global_param name="QUERY_ANALYSER" type="select" choices="utf8num_insensitive|UTF8 Text/Num (case insensitive),utf8num_sensitive|UTF8 Text/Num (case sensitive),utf8_insensitive|UTF8 Text (case insensitive),utf8_sensitive|UTF8 Text (case sensitive),textnum_insensitive|Text/Num (case insensitive),textnum_sensitive|Text/Num (case sensitive),text_insensitive|Text (case insensitive),text_sensitive|Text (case sensitive)" label="CONF_MESSAGE[Query Analyzer]" description="CONF_MESSAGE[Analyzer used by Zend to parse the queries. Warning, the UTF8 analyzers require the php mbstring extension.]" default="textnum_insensitive" mandatory="false"/>
26+
<global_param name="AUTO_WILDCARD" type="boolean" label="CONF_MESSAGE[Auto-Wildcard]" description="CONF_MESSAGE[Automatically append a * after the user query to make the search broader]" default="false" mandatory="false"/>
2727
<global_param name="WILDCARD_LIMITATION" type="integer" label="CONF_MESSAGE[Wildcard limitation]" description="CONF_MESSAGE[For the sake of performances, it is not recommanded to use wildcard as a very first character of a query string. Lucene recommends asking the user minimum 3 characters before wildcard. Still, you can set it to 0 if necessary for your usecases.]" default="3" mandatory="false"/>
2828
</server_settings>
2929
<registry_contributions>

0 commit comments

Comments
 (0)