1515
1616namespace ApacheSolrForTypo3 \Solr \Domain \Search \Score ;
1717
18+ use TYPO3 \CMS \Core \Utility \GeneralUtility ;
19+
1820/**
1921 * Provides the functionality to calculate scores and renders them in a minimalistic template.
2022 */
2123class ScoreCalculationService
2224{
25+ private array $ fieldBoostMapping ;
26+
2327 /**
2428 * Renders an overview in HTML of how the score for a certain document has been calculated by Apache Solr using debug data.
2529 *
2630 * @param string $solrDebugData debug data from the solr response
31+ * @param string $queryFields
2732 * @return string The HTML showing the score analysis
2833 */
2934 public function getRenderedScores (string $ solrDebugData , string $ queryFields ): string
3035 {
31- $ highScores = $ this ->parseScores ($ solrDebugData , $ queryFields );
36+ foreach (GeneralUtility::trimExplode (', ' , $ queryFields , true ) as $ queryField ) {
37+ list ($ field , $ boost ) = explode ('^ ' , $ queryField );
38+ $ this ->fieldBoostMapping [$ field ] = (float )$ boost ;
39+ }
40+
41+ $ solrDebugArray = explode (PHP_EOL , trim ($ solrDebugData ));
42+ $ highScores = $ this ->parseScores ($ solrDebugArray );
3243 return $ this ->render ($ highScores );
3344 }
3445
@@ -38,71 +49,122 @@ public function getRenderedScores(string $solrDebugData, string $queryFields): s
3849 public function render (array $ highScores ): string
3950 {
4051 $ scores = [];
41- $ totalScore = 0 ;
52+
53+ $ content = '<table class="table"> '
54+ . '<thead><tr><th>Score</th><th>Field</th><th>Boost</th><th>Search term</th></tr></thead> '
55+ . '<tbody> ' ;
4256
4357 foreach ($ highScores as $ highScore ) {
44- /** @var Score $highScore */
45- $ scores [] =
46- '<td>+ ' . htmlspecialchars (number_format ($ highScore ->getScore (), 9 )) . '</td> '
47- . '<td> ' . htmlspecialchars ($ highScore ->getFieldName ()) . '</td> '
48- . '<td> ' . htmlspecialchars (number_format ($ highScore ->getBoost (), 9 )) . '</td> ' ;
49- $ totalScore += $ highScore ->getScore ();
58+ $ content .= $ this ->renderRow ($ highScore ['node ' ], $ level = 0 , null );
59+ foreach ($ highScore ['children ' ] ?? [] as $ child ) {
60+ $ content .= $ this ->renderRow ($ child ['node ' ], $ level = 1 , $ highScore ['node ' ]);
61+ foreach ($ child ['children ' ] ?? [] as $ grandchild ) {
62+ $ content .= $ this ->renderRow ($ grandchild ['node ' ], $ level = 2 , $ child ['node ' ]);
63+ foreach ($ grandchild ['children ' ] ?? [] as $ greatgrandchild ) {
64+ $ content .= $ this ->renderRow ($ greatgrandchild ['node ' ], $ level = 3 , $ grandchild ['node ' ]);
65+ }
66+ }
67+ }
5068 }
5169
52- return '<table class="table"> '
53- . '<thead><tr><th>Score</th><th>Field</th><th>Boost</th></tr></thead> '
54- . '<tbody><tr> ' . implode ('</tr><tr> ' , $ scores ) . '</tbody></tr> '
55- . '<tfoot><tr><td colspan="3">= ' . $ totalScore . ' (Inaccurate analysis! Not all parts of the score have been taken into account.)</td></tr></tfoot> '
70+ $ content .= '</tbody> '
5671 . '</table> ' ;
72+
73+ return $ content ;
74+ }
75+
76+ private function renderRow ($ node , $ level , $ parent )
77+ {
78+ $ style = '' ;
79+ if ($ parent ?->getFieldName() === 'max of ' ) {
80+ if ($ parent ->getScore () != $ node ->getScore ()) {
81+ $ style = 'color:gray ' ;
82+ }
83+ }
84+ $ pad = str_repeat ('  ' , $ level * 7 );
85+ return '<tr> '
86+ . '<td style=" ' . $ style . '"> ' . $ pad . '+ ' . number_format ($ node ->getScore (), 2 ) . '</td> '
87+ . '<td style=" ' . $ style . '"> ' . htmlspecialchars ($ node ->getFieldName ()) . '</td> '
88+ . '<td style=" ' . $ style . '"> ' . htmlspecialchars ($ node ->getBoost ()) . '</td> '
89+ . '<td style=" ' . $ style . '"> ' . htmlspecialchars ($ node ->getSearchTerm ()) . '</td> '
90+ .'</tr> ' ;
5791 }
5892
5993 /**
60- * Parses the debugData and the queryFields into an array of score objects.
61- *
62- * @return Score[] array of Score
94+ * Recursively turns an array of indented lines into a hierarchical array.
6395 */
64- public function parseScores (string $ debugData , string $ queryFields ): array
96+ private function parseScores (array & $ lines = [], int $ depth = 0 , int $ failsafe = 0 ): array
6597 {
66- $ highScores = [];
67-
68- /* TODO Provide better parsing
69- *
70- * parsing could be done line by line,
71- * * recording indentation level
72- * * replacing abbreviations
73- * * replacing phrases like "product of" by mathematical symbols (* or x)
74- * * ...
75- */
76-
77- // matches search term weights, ex: 0.42218783 = (MATCH) weight(content:iPod^40.0 in 43), product of:
78- $ pattern = '/(.*) = weight\(([^ \)]*)/ ' ;
79- $ scoreMatches = [];
80- preg_match_all ($ pattern , $ debugData , $ scoreMatches );
81-
82- foreach ($ scoreMatches [0 ] as $ key => $ value ) {
83- // split field from search term
84- [$ field , $ searchTerm ] = explode (': ' , $ scoreMatches [2 ][$ key ]);
85-
86- $ currentScoreValue = (float )$ scoreMatches [1 ][$ key ];
87-
88- $ scoreWasSetForFieldBefore = isset ($ highScores [$ field ]);
89- $ scoreIsHigher = false ;
90- if ($ scoreWasSetForFieldBefore ) {
91- /** @var Score $previousScore */
92- $ previousScore = $ highScores [$ field ];
93- $ scoreIsHigher = $ previousScore ->getScore () < $ currentScoreValue ;
98+ if ($ failsafe >= 1000 ) {
99+ die ('failsafe ' );
100+ }
101+
102+ $ result = [];
103+ while ($ line = current ($ lines )) {
104+ $ indentation = strlen ($ line ) - strlen (ltrim ($ line ));
105+ $ currentDepth = (int )($ indentation / 2 );
106+
107+ if ($ currentDepth < $ depth ) {
108+ // that's the next parent already!
109+ break ;
94110 }
95111
96- // keep track of the highest score per search term
97- if (!$ scoreWasSetForFieldBefore || $ scoreIsHigher ) {
98- $ pattern = '/ ' . preg_quote ($ field , '/ ' ) . '\^([\d.]*)/ ' ;
99- $ boostMatches = [];
100- preg_match_all ($ pattern , $ queryFields , $ boostMatches );
101- $ boost = (float )$ boostMatches [1 ][0 ];
102- $ highScores [$ field ] = new Score ($ boost , $ field , $ currentScoreValue , $ searchTerm );
112+ if ($ currentDepth == $ depth ) {
113+ // that's a sibling
114+ array_shift ($ lines );
115+ }
116+
117+ if ($ currentDepth >= $ depth ) {
118+ // that's the first kid
119+ $ result [] = [
120+ 'node ' => $ this ->parseLine (trim ($ line )),
121+ 'children ' => $ this ->parseScores ($ lines , $ depth +1 , $ failsafe ++),
122+ ];
123+ }
124+ }
125+
126+ return $ result ;
127+ }
128+
129+ /**
130+ * Parses a single line of score debugging output and
131+ * transforms it into a Score object.
132+ */
133+ private function parseLine (string $ line ): ?Score
134+ {
135+ if (preg_match ('/(\d+\.\d+) = weight\((.*)\)/ ' , $ line , $ weightMatch )) {
136+ $ score = (float )$ weightMatch [1 ];
137+ $ field = '' ;
138+ $ boost = 0.0 ;
139+ $ searchTerm = '?? ' ;
140+ if (preg_match ('/(\w+):(\w+)/ ' , $ weightMatch [2 ], $ match )) {
141+ $ field = $ match [1 ];
142+ $ boost = $ this ->fieldBoostMapping [$ field ] ?? 0.0 ;
143+ $ searchTerm = $ match [2 ];
144+ } elseif (preg_match ('/(\w+):"([\w\ ]+)"/ ' , $ weightMatch [2 ], $ match )) {
145+ $ field = $ match [1 ];
146+ $ boost = $ this ->fieldBoostMapping [$ field ] ?? 0.0 ;
147+ $ searchTerm = $ match [2 ];
103148 }
149+ $ score = new Score ($ boost , $ field , $ score , $ searchTerm );
150+ } elseif (preg_match ('/(\d+\.\d+) = sum of:/ ' , $ line , $ match )) {
151+ $ score = (float )$ match [1 ];
152+ $ score = new Score (0.0 , 'sum of ' , $ score , '' );
153+ } elseif (preg_match ('/(\d+\.\d+) = max of:/ ' , $ line , $ match )) {
154+ $ score = (float )$ match [1 ];
155+ $ score = new Score (0.0 , 'max of ' , $ score , '' );
156+ } elseif (preg_match ('/(\d+\.\d+) = FunctionQuery\((.*)\),/ ' , $ line , $ match )) {
157+ $ score = (float )$ match [1 ];
158+ $ function = $ match [2 ];
159+ $ score = new Score (0.0 , 'boostFunction ' , $ score , $ function );
160+ } elseif (preg_match ('/(\d+\.\d+) = (.*)/ ' , $ line , $ match )) {
161+ $ score = (float )$ match [1 ];
162+ $ misc = $ match [2 ];
163+ $ score = new Score (0.0 , '' , $ score , $ misc );
164+ } else {
165+ $ score = null ;
104166 }
105167
106- return $ highScores ;
168+ return $ score ;
107169 }
108170}
0 commit comments