Skip to content

Commit ec2675a

Browse files
committed
[FEATURE] Improve parsing/rendering of score debug output
1 parent 1e3fe3f commit ec2675a

File tree

1 file changed

+114
-52
lines changed

1 file changed

+114
-52
lines changed

Classes/Domain/Search/Score/ScoreCalculationService.php

Lines changed: 114 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,31 @@
1515

1616
namespace ApacheSolrForTypo3\Solr\Domain\Search\Score;
1717

18+
use TYPO3\CMS\Core\Utility\GeneralUtility;
19+
1820
/**
1921
* Provides the functionality to calculate scores and renders them in a minimalistic template.
2022
*/
2123
class ScoreCalculationService
2224
{
25+
private array $fieldBoostMapping;
26+
2327
/**
2428
* Renders an overview in HTML of how the score for a certain document has been calculated by Apache Solr using debug data.
2529
*
2630
* @param string $solrDebugData debug data from the solr response
31+
* @param string $queryFields
2732
* @return string The HTML showing the score analysis
2833
*/
2934
public function getRenderedScores(string $solrDebugData, string $queryFields): string
3035
{
31-
$highScores = $this->parseScores($solrDebugData, $queryFields);
36+
foreach (GeneralUtility::trimExplode(',', $queryFields, true) as $queryField) {
37+
list($field, $boost) = explode('^', $queryField);
38+
$this->fieldBoostMapping[$field] = (float)$boost;
39+
}
40+
41+
$solrDebugArray = explode(PHP_EOL, trim($solrDebugData));
42+
$highScores = $this->parseScores($solrDebugArray);
3243
return $this->render($highScores);
3344
}
3445

@@ -38,71 +49,122 @@ public function getRenderedScores(string $solrDebugData, string $queryFields): s
3849
public function render(array $highScores): string
3950
{
4051
$scores = [];
41-
$totalScore = 0;
52+
53+
$content = '<table class="table">'
54+
. '<thead><tr><th>Score</th><th>Field</th><th>Boost</th><th>Search term</th></tr></thead>'
55+
. '<tbody>';
4256

4357
foreach ($highScores as $highScore) {
44-
/** @var Score $highScore */
45-
$scores[] =
46-
'<td>+ ' . htmlspecialchars(number_format($highScore->getScore(), 9)) . '</td>'
47-
. '<td>' . htmlspecialchars($highScore->getFieldName()) . '</td>'
48-
. '<td>' . htmlspecialchars(number_format($highScore->getBoost(), 9)) . '</td>';
49-
$totalScore += $highScore->getScore();
58+
$content .= $this->renderRow($highScore['node'], $level = 0, null);
59+
foreach ($highScore['children'] ?? [] as $child) {
60+
$content .= $this->renderRow($child['node'], $level = 1, $highScore['node']);
61+
foreach ($child['children'] ?? [] as $grandchild) {
62+
$content .= $this->renderRow($grandchild['node'], $level = 2, $child['node']);
63+
foreach ($grandchild['children'] ?? [] as $greatgrandchild) {
64+
$content .= $this->renderRow($greatgrandchild['node'], $level = 3, $grandchild['node']);
65+
}
66+
}
67+
}
5068
}
5169

52-
return '<table class="table">'
53-
. '<thead><tr><th>Score</th><th>Field</th><th>Boost</th></tr></thead>'
54-
. '<tbody><tr>' . implode('</tr><tr>', $scores) . '</tbody></tr>'
55-
. '<tfoot><tr><td colspan="3">= ' . $totalScore . ' (Inaccurate analysis! Not all parts of the score have been taken into account.)</td></tr></tfoot>'
70+
$content .= '</tbody>'
5671
. '</table>';
72+
73+
return $content;
74+
}
75+
76+
private function renderRow($node, $level, $parent)
77+
{
78+
$style = '';
79+
if ($parent?->getFieldName() === 'max of') {
80+
if ($parent->getScore() != $node->getScore()) {
81+
$style = 'color:gray';
82+
}
83+
}
84+
$pad = str_repeat('&nbsp', $level * 7);
85+
return '<tr>'
86+
. '<td style="' . $style . '">' . $pad . '+&nbsp;' . number_format($node->getScore(), 2) . '</td>'
87+
. '<td style="' . $style . '">' . htmlspecialchars($node->getFieldName()) . '</td>'
88+
. '<td style="' . $style . '">' . htmlspecialchars($node->getBoost()) . '</td>'
89+
. '<td style="' . $style . '">' . htmlspecialchars($node->getSearchTerm()) . '</td>'
90+
.'</tr>';
5791
}
5892

5993
/**
60-
* Parses the debugData and the queryFields into an array of score objects.
61-
*
62-
* @return Score[] array of Score
94+
* Recursively turns an array of indented lines into a hierarchical array.
6395
*/
64-
public function parseScores(string $debugData, string $queryFields): array
96+
private function parseScores(array &$lines = [], int $depth = 0, int $failsafe = 0): array
6597
{
66-
$highScores = [];
67-
68-
/* TODO Provide better parsing
69-
*
70-
* parsing could be done line by line,
71-
* * recording indentation level
72-
* * replacing abbreviations
73-
* * replacing phrases like "product of" by mathematical symbols (* or x)
74-
* * ...
75-
*/
76-
77-
// matches search term weights, ex: 0.42218783 = (MATCH) weight(content:iPod^40.0 in 43), product of:
78-
$pattern = '/(.*) = weight\(([^ \)]*)/';
79-
$scoreMatches = [];
80-
preg_match_all($pattern, $debugData, $scoreMatches);
81-
82-
foreach ($scoreMatches[0] as $key => $value) {
83-
// split field from search term
84-
[$field, $searchTerm] = explode(':', $scoreMatches[2][$key]);
85-
86-
$currentScoreValue = (float)$scoreMatches[1][$key];
87-
88-
$scoreWasSetForFieldBefore = isset($highScores[$field]);
89-
$scoreIsHigher = false;
90-
if ($scoreWasSetForFieldBefore) {
91-
/** @var Score $previousScore */
92-
$previousScore = $highScores[$field];
93-
$scoreIsHigher = $previousScore->getScore() < $currentScoreValue;
98+
if ($failsafe >= 1000) {
99+
die('failsafe');
100+
}
101+
102+
$result = [];
103+
while ($line = current($lines)) {
104+
$indentation = strlen($line) - strlen(ltrim($line));
105+
$currentDepth = (int)($indentation / 2);
106+
107+
if ($currentDepth < $depth) {
108+
// that's the next parent already!
109+
break;
94110
}
95111

96-
// keep track of the highest score per search term
97-
if (!$scoreWasSetForFieldBefore || $scoreIsHigher) {
98-
$pattern = '/' . preg_quote($field, '/') . '\^([\d.]*)/';
99-
$boostMatches = [];
100-
preg_match_all($pattern, $queryFields, $boostMatches);
101-
$boost = (float)$boostMatches[1][0];
102-
$highScores[$field] = new Score($boost, $field, $currentScoreValue, $searchTerm);
112+
if ($currentDepth == $depth) {
113+
// that's a sibling
114+
array_shift($lines);
115+
}
116+
117+
if ($currentDepth >= $depth) {
118+
// that's the first kid
119+
$result[] = [
120+
'node' => $this->parseLine(trim($line)),
121+
'children' => $this->parseScores($lines, $depth+1, $failsafe++),
122+
];
123+
}
124+
}
125+
126+
return $result;
127+
}
128+
129+
/**
130+
* Parses a single line of score debugging output and
131+
* transforms it into a Score object.
132+
*/
133+
private function parseLine(string $line): ?Score
134+
{
135+
if (preg_match('/(\d+\.\d+) = weight\((.*)\)/', $line, $weightMatch)) {
136+
$score = (float)$weightMatch[1];
137+
$field = '';
138+
$boost = 0.0;
139+
$searchTerm = '??';
140+
if (preg_match('/(\w+):(\w+)/', $weightMatch[2], $match)) {
141+
$field = $match[1];
142+
$boost = $this->fieldBoostMapping[$field] ?? 0.0;
143+
$searchTerm = $match[2];
144+
} elseif (preg_match('/(\w+):"([\w\ ]+)"/', $weightMatch[2], $match)) {
145+
$field = $match[1];
146+
$boost = $this->fieldBoostMapping[$field] ?? 0.0;
147+
$searchTerm = $match[2];
103148
}
149+
$score = new Score($boost, $field, $score, $searchTerm);
150+
} elseif (preg_match('/(\d+\.\d+) = sum of:/', $line, $match)) {
151+
$score = (float)$match[1];
152+
$score = new Score(0.0, 'sum of', $score, '');
153+
} elseif (preg_match('/(\d+\.\d+) = max of:/', $line, $match)) {
154+
$score = (float)$match[1];
155+
$score = new Score(0.0, 'max of', $score, '');
156+
} elseif (preg_match('/(\d+\.\d+) = FunctionQuery\((.*)\),/', $line, $match)) {
157+
$score = (float)$match[1];
158+
$function = $match[2];
159+
$score = new Score(0.0, 'boostFunction', $score, $function);
160+
} elseif (preg_match('/(\d+\.\d+) = (.*)/', $line, $match)) {
161+
$score = (float)$match[1];
162+
$misc = $match[2];
163+
$score = new Score(0.0, '', $score, $misc);
164+
} else {
165+
$score = null;
104166
}
105167

106-
return $highScores;
168+
return $score;
107169
}
108170
}

0 commit comments

Comments
 (0)