Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ public function __construct(
*/
public function build(AbstractQuery $query, ?ParamBag $params = null)
{
$query = $this->possiblyConvertMixedExactQueryIntoAdvanced($query);

$newParams = new ParamBag();

// Add spelling query if applicable -- note that we must set this up before
Expand Down Expand Up @@ -210,6 +212,64 @@ public function build(AbstractQuery $query, ?ParamBag $params = null)
return $newParams;
}

/**
* Converts a simple query (Query) into an advanced one (QueryGroup) if part of it should be an exact query.
* This only supports a single exact query (surrounded with quotes) combined with a non-exact query.
* Logical operators can be used, but not parentheses or field names.
* The original query is returned for any non-supported case.
*
* @param AbstractQuery $query User query
*
* @return AbstractQuery
*/
protected function possiblyConvertMixedExactQueryIntoAdvanced(AbstractQuery $query): AbstractQuery
{
if (!($query instanceof Query)) {
return $query;
}
$handler = $query->getHandler();
if ($handler && !isset($this->exactSpecs[strtolower($handler)])) {
return $query;
}
$queryString = trim($query->getString());
if (!preg_match('/^([^":()+]*)"([^"]+)"([^":()]*)$/u', $queryString, $parts)) {
return $query;
}
$groupOperator = 'AND';
$negateQuotedPart = false;
$before = trim($parts[1]);
if (preg_match('/^(.+\s+)?(NOT|-)$/u', $before, $notParts)) {
$before = trim($notParts[1]);
$negateQuotedPart = true;
}
if (preg_match('/^(.*)\s+(AND|OR)$/u', $before, $beforeParts)) {
$before = $beforeParts[1];
$groupOperator = $beforeParts[2];
}
$quoted = '"' . $parts[2] . '"';
$after = trim($parts[3]);
if (preg_match('/^(AND|OR)\s*(.*)$/u', $after, $afterParts)) {
$groupOperator = $afterParts[1];
$after = $afterParts[2];
}
if (($before == '' && $after == '') || ($before != '' && $after != '')) {
return $query;
}
$subQueries = [];
if ($before != '') {
$subQueries[] = new Query($before, $handler);
}
if ($negateQuotedPart) {
$subQueries[] = new QueryGroup('NOT', [ new Query($quoted, $handler) ]);
} else {
$subQueries[] = new Query($quoted, $handler);
}
if ($after != '') {
$subQueries[] = new Query($after, $handler);
}
return new QueryGroup($groupOperator, $subQueries);
}

/**
* Check if the conditions match for an extra parameter
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,74 @@ public function testExactQueryHandler()
$this->assertEquals('c d', $qf[0]);
}

/**
* Test queries with mixed exact and non-exact parts.
*
* @return void
*/
public function testMixedExactQueryHandler()
{
// Check QueryBuilder without ExactSettings
$qb = new QueryBuilder(
[
'TestHandler' => [
'DismaxFields' => ['a'],
'DismaxHandler' => 'edismax',
],
]
);
$q = new Query('"t1" AND t2', 'TestHandler');
$response = $qb->build($q);
$queryString = $response->get('q')[0];
$this->assertEquals('"t1" AND t2', $queryString);

// Expected inputs and outputs with ExactSettings:
$tests = [
['"t1"', '"t1"'], // simple exact queries are not affected
['("t1" OR t2) AND t3', '("t1" OR t2) AND t3'], // queries with parenthesis are not supported
['"t1" AND title:t2', '"t1" AND title:t2'], // queries with field are not supported
['"t1" AND "t2"', '"t1" AND "t2"'], // queries with multiple exact parts are not supported
['t1 AND "t2" AND t3', 't1 AND "t2" AND t3'], // queries with an exact part in the middle are not supported
['"t1" t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],
['"t1" AND t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],
['"t1" OR t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") OR ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],
['t1 AND "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1") AND ' .
'(_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))'],
['NOT "t1" AND t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],
['t1 AND NOT "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1") AND ' .
'(*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))))'],
['-"t1" t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],
['"t1" AND t2 AND t3', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' .
'(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2 AND t3"))'], // would be different with dismax
];
Comment on lines 320 to 342
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this part be better handled as a separate test using a data provider?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is consistent with testNormalization(). Are you thinking of something like getQuestionTests() ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, these existing tests are very old and not using best practices. Even getQuestionTests should be refactored to use a proper dataProvider. Would it be helpful for me to open a PR to modernize the existing tests so you have a model to work from here? I can probably find time for that tomorrow if it would be useful.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #4981 as a demonstration of the sort of change I had in mind.


$qb = new QueryBuilder(
[
'TestHandler' => [
'DismaxFields' => ['a'],
'DismaxHandler' => 'edismax',
'ExactSettings' => [
'DismaxFields' => ['b'],
'DismaxHandler' => 'edismax',
],
],
]
);

foreach ($tests as $test) {
[$input, $output] = $test;
$q = new Query($input, 'TestHandler');
$response = $qb->build($q);
$queryString = $response->get('q')[0];
$this->assertEquals($output, $queryString);
}
}

/**
* Test generation with a query handler with a filter set and DisMax settings
*
Expand Down