Skip to content

Commit 75009f8

Browse files
Merge branch 'main' into dependabot/npm_and_yarn/Build/lodash-4.17.23
2 parents 82e8cf8 + 83bdc9a commit 75009f8

File tree

18 files changed

+305
-8
lines changed

18 files changed

+305
-8
lines changed

Classes/Common/Indexer.php

Lines changed: 167 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ class Indexer
8080
*/
8181
protected static array $processedDocs = [];
8282

83+
/**
84+
* @access protected
85+
* @static
86+
* @var array List of already extracted structure nodes for structure path
87+
*/
88+
protected static array $extractedStructurePathNodes = [];
89+
8390
/**
8491
* @access protected
8592
* @static
@@ -371,6 +378,10 @@ protected static function processLogical(Document $document, array $logicalUnit)
371378
$solrDoc->setField('toplevel', $logicalUnit['id'] == $doc->getToplevelId());
372379
$solrDoc->setField('title', $metadata['title'][0]);
373380
$solrDoc->setField('volume', $metadata['volume'][0] ?? '');
381+
// extract structure path
382+
self::$extractedStructurePathNodes[$logicalUnit['id']] = self::extractStructurePathNodes($doc->tableOfContents, $logicalUnit['id']);
383+
$processedStructurePath = self::buildStructurePathData(self::$extractedStructurePathNodes[$logicalUnit['id']], $document->getCurrentDocument()->getToplevelId());
384+
$solrDoc->setField('structure_path', json_encode($processedStructurePath, JSON_UNESCAPED_UNICODE));
374385
// verify date formatting
375386
if (strtotime($metadata['date'][0])) {
376387
$solrDoc->setField('date', self::getFormattedDate($metadata['date'][0]));
@@ -465,7 +476,21 @@ protected static function processPhysical(Document $document, int $page, array $
465476
$solrDoc->setField('type', $physicalUnit['type']);
466477
$solrDoc->setField('collection', $doc->metadataArray[$doc->getToplevelId()]['collection']);
467478
$solrDoc->setField('location', $document->getLocation());
468-
479+
// pick only the deepest structure paths
480+
$associatedPaths = [];
481+
foreach ($doc->smLinks['p2l'][$physicalUnit['id']] as $logicalId) {
482+
$path = self::$extractedStructurePathNodes[$logicalId] ?? [];
483+
if (!empty($path)) {
484+
$associatedPaths[$logicalId] = $path;
485+
}
486+
}
487+
$deepestPaths = self::filterDeepestStructurePaths($associatedPaths);
488+
$processedStructurePath = [];
489+
foreach ($deepestPaths as $path) {
490+
$segments = self::buildStructurePathData($path, $document->getCurrentDocument()->getToplevelId());
491+
$processedStructurePath[] = json_encode($segments, JSON_UNESCAPED_UNICODE);
492+
}
493+
$solrDoc->setField('structure_path', $processedStructurePath);
469494
$solrDoc->setField('fulltext', $fullText);
470495
if (is_array($doc->metadataArray[$doc->getToplevelId()])) {
471496
self::addFaceting($doc, $solrDoc, $physicalUnit);
@@ -728,6 +753,147 @@ private static function removeAppendsFromAuthor(array|string $authors): array|st
728753
return $authors;
729754
}
730755

756+
/**
757+
* Extract nodes alongside the structure map in direct line to the target id and return them as flattened array.
758+
*
759+
* @access private
760+
*
761+
* @static
762+
*
763+
* @param array $nodes Tree or Sub-Tree, where the target id should be extracted from if present
764+
* @param string $targetId The ID of the logical structure element to be found
765+
* @param array $path An intermediate array that keeps track of the current branch that is being looked up
766+
*
767+
* @return array
768+
*/
769+
private static function extractStructurePathNodes(array $nodes, string $targetId, array $path = []): array
770+
{
771+
foreach ($nodes as $node) {
772+
// remember where we came from
773+
$currentPath = array_merge($path, [$node]);
774+
if ($node['id'] == $targetId) {
775+
return $currentPath;
776+
}
777+
if (!empty($node['children'])) {
778+
$result = self::extractStructurePathNodes($node['children'], $targetId, $currentPath);
779+
if ($result) {
780+
return $result;
781+
}
782+
}
783+
}
784+
return [];
785+
}
786+
787+
/**
788+
* Filters those structure path nodes that are the descending into the structure tree the most and removes any that resemble a "prefix" of another.
789+
*
790+
* @access private
791+
*
792+
* @static
793+
*
794+
* @param array $paths The array containing all structure path nodes associated with a physical page
795+
*
796+
* @return array
797+
*/
798+
private static function filterDeepestStructurePaths(array $paths): array
799+
{
800+
if (count($paths) <= 1) {
801+
return $paths;
802+
}
803+
804+
$deepestPath = [];
805+
foreach ($paths as $currentLogicalId => $currentPath) {
806+
$currentIds = array_column($currentPath, 'id');
807+
$isPrefix = false;
808+
809+
foreach ($paths as $comparisonLogicalId => $comparisonPath) {
810+
if ($currentLogicalId === $comparisonLogicalId) {
811+
continue;
812+
}
813+
$comparisonIds = array_column($comparisonPath, 'id');
814+
// check if structure path is part/prefix of another structure path
815+
if (
816+
count($currentIds) < count($comparisonIds)
817+
&& array_slice($comparisonIds, 0, count($currentIds)) === $currentIds
818+
) {
819+
$isPrefix = true;
820+
break;
821+
}
822+
}
823+
824+
if (!$isPrefix) {
825+
$deepestPath[$currentLogicalId] = $currentPath;
826+
}
827+
}
828+
return $deepestPath;
829+
}
830+
831+
/**
832+
* Create the actual array with the required data for the structure path that will be JSON encoded and indexed.
833+
*
834+
* @access private
835+
*
836+
* @static
837+
*
838+
* @param array $path The structure path nodes that shall be processed
839+
* @param string $cutoffId The logical id at which ancestors and itself will not be part of the structure path data
840+
*
841+
* @return array
842+
*/
843+
private static function buildStructurePathData(array $path, string $cutoffId): array
844+
{
845+
$cutoffIndex = array_search($cutoffId, array_column($path, 'id'));
846+
if ($cutoffIndex !== false) {
847+
$path = array_slice($path, $cutoffIndex + 1);
848+
}
849+
850+
$segments = [];
851+
foreach ($path as $node) {
852+
$segments[] = self::buildStructurePathSegments($node);
853+
}
854+
return $segments;
855+
}
856+
857+
/**
858+
* Gets the label or type of a structure path node with corresponding tag
859+
*
860+
* @access private
861+
*
862+
* @static
863+
*
864+
* @param array $node The current node that should be processed
865+
*
866+
* @return array
867+
*/
868+
private static function buildStructurePathSegments(array $node): array
869+
{
870+
if (!empty($node['label'])) {
871+
return [
872+
'label' => $node['label'],
873+
];
874+
}
875+
if (!empty($node['orderlabel'])) {
876+
return [
877+
'label' => $node['orderlabel'],
878+
];
879+
}
880+
if (!empty($node['volume'])) {
881+
$value = !empty($node['year'])
882+
? $node['volume'] . ' ' . $node['year']
883+
: $node['volume'];
884+
885+
return [
886+
'label' => $value,
887+
];
888+
}
889+
if (!empty($node['type'])) {
890+
return [
891+
'type' => $node['type'],
892+
];
893+
}
894+
return ['label' => ''];
895+
}
896+
731897
/**
732898
* Handle exception.
733899
*

Classes/Common/Solr/SearchResult/ResultDocument.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ class ResultDocument
7373
*/
7474
private ?string $type;
7575

76+
/**
77+
* @access private
78+
* @var array The JSON encoded structure path(s)
79+
*/
80+
private array $structurePath = [];
81+
7682
/**
7783
* @access private
7884
* @var Page[] All pages in which search phrase was found
@@ -117,6 +123,7 @@ public function __construct(Document $record, array $highlighting, array $fields
117123
$this->title = $record[$fields['title']];
118124
$this->toplevel = $record[$fields['toplevel']] ?? false;
119125
$this->type = $record[$fields['type']];
126+
$this->structurePath = $record[$fields['structure_path']] ?? [];
120127

121128
if (!empty($highlighting[$this->id])) {
122129
$highlightingForRecord = $highlighting[$this->id][$fields['fulltext']];
@@ -225,6 +232,18 @@ public function getType(): ?string
225232
return $this->type;
226233
}
227234

235+
/**
236+
* Get the structure path(s)
237+
*
238+
* @access public
239+
*
240+
* @return array
241+
*/
242+
public function getStructurePath(): array
243+
{
244+
return $this->structurePath;
245+
}
246+
228247
/**
229248
* Get all result's pages which contain search phrase.
230249
*

Classes/Common/Solr/Solr.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ public static function getFields(): array
256256
self::$fields['type'] = $solrFields['type'];
257257
self::$fields['title'] = $solrFields['title'];
258258
self::$fields['volume'] = $solrFields['volume'];
259+
self::$fields['structure_path'] = $solrFields['structurePath'];
259260
self::$fields['date'] = $solrFields['date'] ?? null;
260261
self::$fields['thumbnail'] = $solrFields['thumbnail'];
261262
self::$fields['default'] = $solrFields['default'];

Classes/Common/Solr/SolrSearch.php

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ public function prepare()
487487
$params['listMetadataRecords'] = [];
488488

489489
// Restrict the fields to the required ones.
490-
$params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type';
490+
$params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type,structure_path';
491491

492492
if ($this->listedMetadata) {
493493
foreach ($this->listedMetadata as $metadata) {
@@ -560,6 +560,31 @@ public function submit(int $start, int $rows, bool $processResults = true): void
560560
$searchResult['page'] = $doc['page'];
561561
$searchResult['thumbnail'] = $doc['thumbnail'];
562562
$searchResult['structure'] = $doc['type'];
563+
// create string(s) from structure path(s)
564+
$encodedStructurePaths = $doc['structure_path'] ?? [];
565+
if (!is_array($encodedStructurePaths)) {
566+
$encodedStructurePaths = [$encodedStructurePaths];
567+
}
568+
$structurePathStrings = [];
569+
foreach ($encodedStructurePaths as $jsonString) {
570+
if (!is_string($jsonString) || $jsonString === '') {
571+
continue;
572+
}
573+
$segments = json_decode($jsonString, true);
574+
if ($segments === null && json_last_error() !== JSON_ERROR_NONE) {
575+
continue;
576+
}
577+
$structurePathLabels = [];
578+
foreach ($segments as $currentSegment) {
579+
if (isset($currentSegment['type'])) {
580+
$structurePathLabels[] = Helper::translate($currentSegment['type'], 'tx_dlf_structures', $this->settings['storagePid']);
581+
} elseif (!empty($currentSegment['label'])) {
582+
$structurePathLabels[] = $currentSegment['label'];
583+
}
584+
}
585+
$structurePathStrings[] = implode('', $structurePathLabels);
586+
}
587+
$searchResult['structure_path'] = $structurePathStrings;
563588
$searchResult['title'] = $doc['title'];
564589
foreach ($params['listMetadataRecords'] as $indexName => $solrField) {
565590
if (isset($doc['metadata'][$indexName])) {
@@ -901,6 +926,7 @@ private function getDocument(Document $record, array $highlighting, array $field
901926
'title' => $resultDocument->getTitle(),
902927
'toplevel' => $resultDocument->getToplevel(),
903928
'type' => $resultDocument->getType(),
929+
'structure_path' => $resultDocument->getStructurePath(),
904930
'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'],
905931
'highlight' => $resultDocument->getHighlightsIds(),
906932
];

Configuration/ApacheSolr/configsets/dlf/conf/schema.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ limitations under the License.
144144
<!-- Next two fields are mandatory for identifying documents. -->
145145
<field name="title" type="standard" indexed="true" stored="true" multiValued="false" default="" />
146146
<field name="volume" type="standard" indexed="true" stored="true" multiValued="false" default="" />
147+
<!-- Convenience field to provide context about the path within the logical structure map -->
148+
<field name="structure_path" type="string" indexed="false" stored="true" multiValued="true" default="" />
147149
<!-- The keydate of a resource e.g a newspaper was issued or a letter was written -->
148150
<field name="date" type="daterange" indexed="true" stored="true" multiValued="false" />
149151
<!-- URL of thumbnail image for the document. -->

Configuration/FlexForms/ListView.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@
6565
<default>0</default>
6666
</config>
6767
</settings.getTitle>
68+
<settings.getStructurePath>
69+
<exclude>1</exclude>
70+
<label>LLL:EXT:dlf/Resources/Private/Language/locallang_be.xlf:flexform.getStructurePath</label>
71+
<config>
72+
<type>check</type>
73+
<default>0</default>
74+
</config>
75+
</settings.getStructurePath>
6876
<settings.basketButton>
6977
<onChange>reload</onChange>
7078
<exclude>1</exclude>

Documentation/Administrator/Index.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,26 @@ d. Reindex all documents. This can be done by the kitodo:reindex CLI command wit
226226
Furthermore version 5.1 supports the use of Solr Managed Schemas to update the schemas automatically during the update of the extension.
227227
To use this feature you have to change the schemaFactory within solrconfig.xml from "ClassicIndexSchemaFactory" to "ManagedIndexSchemaFactory".
228228

229+
Version 5.1 & Version 6.0 -> 7.0
230+
==================
231+
232+
Version 7.0 introduces a new Solr field :code:`structure_path`, that provides context in the ListView about where search results appear in the structure tree. Indexing requires
233+
the field to be present in your running Solr instance, thus making the update of the schema.xml mandatory.
234+
235+
Steps to Update your Solr schema.xml
236+
---------------
237+
a. Copy the updated schema.xml to your Solr configsets in $SOLR_HOME/configsets/dlf/
238+
b. Restart Solr.
239+
c. Reindex all documents in order to profit from the new field. This can be done by the kitodo:reindex CLI command with the '-a' (all) flag. See: :ref:`reindex_collections`.
240+
241+
Plugin ListView
242+
---------------
243+
244+
The ListView plugin has now a new setting 'Show breadcrumb/path to result location within the structure map', which is deactivated by default. When activated the
245+
search results will display a string similar to a breadcrumb, that shows the label or type of the parents structures up to but excluding the toplevel structure. The
246+
structure path will always be generated during indexing, the plugin settings toggles wether it will be displayed or not. Documents that not have been reindexed yet
247+
will not display a structure path.
248+
229249
*******
230250
Logging
231251
*******

Documentation/Plugins/Index.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,13 @@ List View
443443
:Default:
444444
0
445445

446+
- :Property:
447+
getStructurePath
448+
:Data Type:
449+
:ref:`t3tsref:data-type-boolean`
450+
:Default:
451+
0
452+
446453
- :Property:
447454
basketButton
448455
:Data Type:

Resources/Private/Language/de.locallang_be.xlf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
<source><![CDATA[Show only documents from the selected collection(s)]]></source>
1818
<target><![CDATA[Nur Dokumente der ausgewählten Kollektion(en) berücksichtigen]]></target>
1919
</trans-unit>
20+
<trans-unit id="flexform.getStructurePath" approved="yes">
21+
<source><![CDATA[Show breadcrumb/path to result location within the structure map]]></source>
22+
<target><![CDATA[Breadcrumb/Pfad des Treffers innerhalb des Strukturbaums anzeigen]]></target>
23+
</trans-unit>
2024
<trans-unit id="flexform.getTitle" approved="yes">
2125
<source><![CDATA[Show title of parent document if document has no title itself]]></source>
2226
<target><![CDATA[Bei Bedarf Titel des übergeordneten Dokuments anzeigen]]></target>

Resources/Private/Language/de.locallang_labels.xlf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,10 @@
821821
<target>Solr-Schema-Feld "volume" : Volume field is mandatory for identifying documents (Standard ist "volume")</target>
822822
<source>Solr Schema Field "volume" : Volume field is mandatory for identifying documents (default is "volume")</source>
823823
</trans-unit>
824+
<trans-unit id="config.solr.fields.structurePath">
825+
<target>Solr-Schema-Feld "structure_path" : Field providing context about the location of a resource in the structure map (Standard ist "structure_path")</target>
826+
<source>Solr Schema Field "structure_path" : Field providing context about the location of a resource in the structure map (default is "structure_path")</source>
827+
</trans-unit>
824828
<trans-unit id="config.solr.fields.date">
825829
<target>Solr Schema Field "date" : The date a resource was issued or created. Used for datesearch (Standard ist "date")</target>
826830
<source>Solr Schema Field "date" : The date a resource was issued or created. Used for datesearch (default is "date")</source>

0 commit comments

Comments
 (0)