Skip to content

Commit 5c794b7

Browse files
adunn49blueo
andauthored
Bugfix: Reindex job fix (CMS 6) (#60)
* Fix issue with ReindexJob * Add missing use import * Add PageFakeVersioned to extra_dataobjects * Update to configurable sort and direction for fetching documents * ISSUE-58: update CMS6 API use * ISSUE-58: update docs fixes #58 --------- Co-authored-by: Bernie Hamlin <bernie@silverstripe.com>
1 parent 8b878c7 commit 5c794b7

File tree

4 files changed

+102
-1
lines changed

4 files changed

+102
-1
lines changed

docs/en/08_customising_more.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,26 @@ happen as a result of a DataObject being ready to go into the index. It is invok
6868

6969
This is an extension point on the IndexConfiguration class that allows updating what indexes a document is configured for
7070

71+
## Customising the fetch sort order
72+
73+
When reindexing, the `DataObjectFetcher` retrieves records in batches. By default, records are sorted by `ID` in ascending order to ensure consistent ordering across batches. You can customise the sort field and direction using configuration:
74+
75+
```yaml
76+
SilverStripe\Forager\DataObject\DataObjectFetcher:
77+
fetch_sort: 'LastEdited'
78+
fetch_sort_direction: 'DESC'
79+
```
80+
81+
| Option | Default | Description |
82+
|---|---|---|
83+
| `fetch_sort` | `ID` | The database column to sort by when fetching records for indexing |
84+
| `fetch_sort_direction` | `ASC` | The sort direction (`ASC` or `DESC`) |
85+
86+
This can be useful when you want to prioritise recently edited content during a reindex, or when you need to sort by a specific field for consistency with an external system.
87+
88+
> **Warning**
89+
> Avoid using non-deterministic sort fields such as the default `Sort` column (used by `SortOrder` or drag-and-drop ordering). These values are not guaranteed to be unique across records, which can lead to records being duplicated or skipped between batches during a reindex. Always use a column with unique, stable values (e.g. `ID`, `Created`, or `LastEdited`) to ensure reliable batch pagination.
90+
7191
## More information
7292

7393
* [Adding a new search service](06_customising_add_search_service.md)

src/DataObject/DataObjectFetcher.php

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ class DataObjectFetcher implements DocumentFetcherInterface
2323

2424
private int $offset = 0;
2525

26+
private static string $fetch_sort = 'ID';
27+
28+
private static string $fetch_sort_direction = 'ASC';
29+
2630
public function __construct(string $class)
2731
{
2832
if (!is_subclass_of($class, DataObject::class)) {
@@ -74,7 +78,14 @@ public function incrementOffsetDown(): void
7478
*/
7579
public function fetch(): array
7680
{
77-
$list = $this->createDataList($this->getBatchSize(), $this->getOffset());
81+
// get configurable sort options
82+
$sortBy = static::config()->get('fetch_sort') ?? 'ID';
83+
$sortDirection = static::config()->get('fetch_sort_direction') ?? 'ASC';
84+
85+
// sort (default by ID) to ensure consistent ordering across batches
86+
$list = $this->createDataList($this->getBatchSize(), $this->getOffset())
87+
->sort($sortBy, $sortDirection);
88+
7889
$docs = [];
7990

8091
foreach ($list as $record) {

tests/DataObject/DataObjectFetcherTest.php

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use SilverStripe\Forager\Tests\Fake\DataObjectFake;
99
use SilverStripe\Forager\Tests\Fake\DataObjectSubclassFake;
1010
use SilverStripe\Forager\Tests\Fake\DataObjectSubclassFakeShouldNotIndex;
11+
use SilverStripe\Forager\Tests\Fake\PageFakeVersioned;
1112
use SilverStripe\Forager\Tests\SearchServiceTestTrait;
1213

1314
class DataObjectFetcherTest extends SapphireTest
@@ -23,6 +24,7 @@ class DataObjectFetcherTest extends SapphireTest
2324
*/
2425
protected static $extra_dataobjects = [
2526
DataObjectFake::class,
27+
PageFakeVersioned::class,
2628
];
2729

2830
public function testFetch(): void
@@ -63,6 +65,54 @@ public function testFetch(): void
6365
$this->assertCount(2, $documents);
6466
}
6567

68+
/**
69+
* This tests that we fetch all documents when processed in batches.
70+
*/
71+
public function testFetchBatch(): void
72+
{
73+
// create pages
74+
$createPageCount = 100;
75+
76+
for ($i = 0; $i < $createPageCount; $i++) {
77+
$dataobject = PageFakeVersioned::create();
78+
$dataobject->Title = sprintf('FetchTestPage');
79+
// added to verify that all pages are set regardless of the sort order
80+
$dataobject->Sort = 1;
81+
$dataobject->write();
82+
$dataobject->publishSingle();
83+
}
84+
85+
$batchSize = 10;
86+
$fetcher = DataObjectFetcher::create(PageFakeVersioned::class);
87+
$fetcher->setBatchSize($batchSize);
88+
$totalDocuments = $fetcher->getTotalDocuments();
89+
90+
$fetchedDocumentCount = 0;
91+
$fetchedDocumentIDs = [];
92+
93+
// keep fetching until we've fetched all documents, using the batch size and offset to get the next batch of
94+
// documents each time
95+
while ($fetchedDocumentCount < $totalDocuments) {
96+
$fetcher->setOffset($fetchedDocumentCount);
97+
$documents = $fetcher->fetch();
98+
99+
$fetchedDocumentCount += count($documents);
100+
101+
// collect all ids so that we can check everything has been fetched at the end of the test
102+
$batchIDs = array_map(function (DataObjectDocument $document) {
103+
return $document->getDataObject()->ID;
104+
}, $documents);
105+
106+
$fetchedDocumentIDs = array_merge($fetchedDocumentIDs, array_values($batchIDs));
107+
}
108+
109+
// only get unique ids so that we can check that all expected documents have been fetched
110+
$fetchedDocumentIDs = array_unique($fetchedDocumentIDs);
111+
112+
// make sure we fetched all the documents
113+
$this->assertCount($totalDocuments, $fetchedDocumentIDs);
114+
}
115+
66116
public function testTotalDocuments(): void
67117
{
68118
$fetcher = DataObjectFetcher::create(DataObjectFake::class);

tests/Fake/PageFakeVersioned.php

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?php
2+
3+
namespace SilverStripe\Forager\Tests\Fake;
4+
5+
use Page;
6+
use SilverStripe\Dev\TestOnly;
7+
use SilverStripe\Forager\Extensions\SearchServiceExtension;
8+
use SilverStripe\Versioned\Versioned;
9+
10+
class PageFakeVersioned extends Page implements TestOnly
11+
{
12+
13+
private static string $table_name = 'PageFakeVersioned';
14+
15+
private static array $extensions = [
16+
SearchServiceExtension::class,
17+
Versioned::class,
18+
];
19+
20+
}

0 commit comments

Comments
 (0)