Skip to content

Commit 5f1dffc

Browse files
Merge pull request #143 from tsmilan/issue-142
[#142] Add automatic retry logic for 413 Request Entity Too Large errors
2 parents fc85fa6 + 134471d commit 5f1dffc

File tree

6 files changed

+510
-75
lines changed

6 files changed

+510
-75
lines changed

classes/engine.php

Lines changed: 138 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
namespace search_elastic;
3333

3434
use search_elastic\local\service\error_service;
35+
use stdClass;
3536

3637
/**
3738
* Elasticsearch engine.
@@ -620,73 +621,135 @@ private function batch_add_documents($jsonpayload, $isdoc = false, $sendnow = fa
620621
// If we don't have enough data to send yet return early.
621622
if ($this->payloadsize < $this->config->sendsize && !$sendnow) {
622623
return $numdocsignored;
623-
} else if ($this->payloadsize > 0) { // Make sure we have at least some data to send.
624-
$url = $this->get_url();
625-
$client = new \search_elastic\esrequest();
626-
$docurl = $url . '/' . $this->config->index . '/_bulk';
627-
$response = $client->post($docurl, $this->payload);
628-
$responsebody = json_decode($response->getBody());
624+
}
629625

630-
// Process response.
631-
// If no errors were returned from bulk operation then numdocs = numrecords.
632-
// If there are errors we need to iterate through he response and count how many.
633-
if ($response->getStatusCode() == 413) {
634-
// TODO: add handling to retry sending payload one record at a time.
635-
$message = get_string('addfail', 'search_elastic') . ' Request Entity Too Large';
636-
error_service::record_batch_error($message, $this->payload);
637-
$numdocsignored = $this->count;
638-
} else if ($response->getStatusCode() >= 300) {
639-
$message = get_string('addfail', 'search_elastic') . ' Error Code: ' . $response->getStatusCode();
640-
error_service::record_batch_error($message, $this->payload);
641-
$numdocsignored = $this->count;
642-
} else if (isset($responsebody->errors) && $responsebody->errors) {
643-
$payloaddocs = $this->parse_payload_documents();
644-
645-
if (isset($responsebody->items) && is_array($responsebody->items)) {
646-
foreach ($responsebody->items as $responseindex => $item) {
647-
if (isset($item->index->status) && $item->index->status >= 300) {
648-
$errortype = $item->index->error->type ?? 'unknown';
649-
$errorreason = $item->index->error->reason ?? 'unknown';
650-
651-
$message = get_string('addfail', 'search_elastic') .
652-
' Error Type: ' . $errortype .
653-
' Error Reason: ' . $errorreason;
654-
655-
// Get corresponding document data using the same index.
656-
$docdata = null;
657-
if (isset($payloaddocs[$responseindex]) && is_array($payloaddocs[$responseindex])) {
658-
$candidatedoc = $payloaddocs[$responseindex];
659-
660-
// Verify document ID matches to ensure we have the right document.
661-
$expectedid = $item->index->_id ?? null;
662-
$parsedid = $candidatedoc['id'] ?? null;
663-
664-
if ($expectedid && $parsedid && $expectedid === $parsedid) {
665-
$docdata = $candidatedoc;
666-
} else {
667-
// Log mismatch for debugging but continue with error recording.
668-
debugging('Document ID mismatch at index ' . $responseindex . ': expected ' .
669-
$expectedid . ', got ' . $parsedid, DEBUG_DEVELOPER);
670-
}
671-
}
672-
673-
error_service::record_document_error($message, $docdata);
674-
$numdocsignored++;
675-
}
676-
}
677-
}
626+
// Make sure we have at least some data to send.
627+
if ($this->payloadsize <= 0) {
628+
return $numdocsignored;
629+
}
630+
631+
// Send the bulk request.
632+
$url = $this->get_url();
633+
$client = new \search_elastic\esrequest();
634+
$docurl = $url . '/' . $this->config->index . '/_bulk';
635+
$response = $client->post($docurl, $this->payload);
636+
$responsebody = json_decode($response->getBody());
637+
$statuscode = $response->getStatusCode();
638+
639+
// Handle different response scenarios.
640+
if ($statuscode == 413) {
641+
$numdocsignored = $this->handle_413_retry();
642+
} else if ($statuscode >= 300) {
643+
$message = get_string('addfail', 'search_elastic') . ' Error Code: ' . $statuscode;
644+
error_service::record_batch_error($message, $this->payload);
645+
$numdocsignored = $this->count;
646+
} else if (isset($responsebody->errors) && $responsebody->errors) {
647+
$numdocsignored = $this->log_bulk_response_item_errors($responsebody);
648+
}
649+
650+
// Reset the counts.
651+
$this->payload = false;
652+
$this->payloadsize = 0;
653+
654+
// Reset the parent doc count after attempting to add.
655+
if ($isdoc) {
656+
$this->count = 0;
657+
}
658+
659+
return $numdocsignored;
660+
}
661+
662+
/**
663+
* Handle 413 payload too large error by retrying documents individually.
664+
*
665+
* @return int Number of documents ignored/failed.
666+
*/
667+
private function handle_413_retry(): int {
668+
// Retry sending payload one record at a time.
669+
$payloaddocs = $this->parse_payload_documents();
670+
$retryignored = 0;
671+
$maxsize = (int)$this->config->sendsize;
672+
673+
foreach ($payloaddocs as $doc) {
674+
if (is_null($doc)) {
675+
$retryignored++;
676+
continue;
677+
}
678+
679+
// Check if individual document is too large.
680+
$docsize = strlen(json_encode($doc));
681+
if ($docsize > $maxsize) {
682+
$retryignored++;
683+
$message = get_string('addfail', 'search_elastic') .
684+
" Document too large ($docsize bytes exceeds $maxsize bytes limit). Doc ID: ({$doc['id']})";
685+
error_service::record_document_error($message, $doc);
686+
continue;
687+
}
688+
689+
if (!$this->index_single_document($doc)) {
690+
$retryignored++;
691+
$message = get_string('addfail', 'search_elastic') .
692+
" Failed on individual retry after 413. Doc ID: ({$doc['id']})";
693+
error_service::record_document_error($message, $doc);
678694
}
695+
}
696+
697+
unset($payloaddocs);
698+
699+
return $retryignored;
700+
}
679701

680-
// Reser the counts.
681-
$this->payload = false;
682-
$this->payloadsize = 0;
702+
/**
703+
* Matches failed items with original documents and records error details.
704+
*
705+
* @param stdClass $responsebody Decoded JSON response from bulk operation.
706+
* @return int Number of documents that failed.
707+
*/
708+
private function log_bulk_response_item_errors(stdClass $responsebody): int {
709+
$payloaddocs = $this->parse_payload_documents();
710+
$numdocsignored = 0;
711+
712+
if (!isset($responsebody->items) || !is_array($responsebody->items)) {
713+
unset($payloaddocs);
714+
return $numdocsignored;
715+
}
683716

684-
// Reset the parent doc count after attempting to add.
685-
if ($isdoc) {
686-
$this->count = 0;
717+
foreach ($responsebody->items as $responseindex => $item) {
718+
if (!isset($item->index->status) || $item->index->status < 300) {
719+
continue;
687720
}
721+
722+
$errortype = $item->index->error->type ?? 'unknown';
723+
$errorreason = $item->index->error->reason ?? 'unknown';
724+
725+
$message = get_string('addfail', 'search_elastic') .
726+
' Error Type: ' . $errortype .
727+
' Error Reason: ' . $errorreason;
728+
729+
// Get corresponding document data using the same index.
730+
$docdata = null;
731+
if (isset($payloaddocs[$responseindex]) && is_array($payloaddocs[$responseindex])) {
732+
$candidatedoc = $payloaddocs[$responseindex];
733+
734+
// Verify document ID matches to ensure we have the right document.
735+
$expectedid = $item->index->_id ?? null;
736+
$parsedid = $candidatedoc['id'] ?? null;
737+
738+
if ($expectedid && $parsedid && $expectedid === $parsedid) {
739+
$docdata = $candidatedoc;
740+
} else {
741+
// Log mismatch for debugging but continue with error recording.
742+
debugging('Document ID mismatch at index ' . $responseindex . ': expected ' .
743+
$expectedid . ', got ' . $parsedid, DEBUG_DEVELOPER);
744+
}
745+
}
746+
747+
error_service::record_document_error($message, $docdata);
748+
$numdocsignored++;
688749
}
689750

751+
unset($payloaddocs);
752+
690753
return $numdocsignored;
691754
}
692755

@@ -717,11 +780,16 @@ private function parse_payload_documents(): array {
717780
if ($metadata && $docdata) {
718781
$documents[$docindex] = [
719782
'metadata' => $metadata,
720-
'id' => $docdata['id'] ?? 'unknown',
721-
'contextid' => $docdata['contextid'] ?? \context_system::instance(),
722-
'areaid' => $docdata['areaid'] ?? 'unknown',
723-
'itemid' => $docdata['itemid'] ?? 0,
724-
'modified' => $docdata['modified'] ?? null,
783+
...$docdata,
784+
];
785+
786+
// Add defaults for missing keys.
787+
$documents[$docindex] += [
788+
'id' => 'unknown',
789+
'contextid' => \context_system::instance(),
790+
'areaid' => 'unknown',
791+
'itemid' => 0,
792+
'modified' => null,
725793
];
726794
}
727795
}
@@ -734,12 +802,12 @@ private function parse_payload_documents(): array {
734802

735803

736804
/**
737-
* Index a single file document.
805+
* Index a single document.
738806
*
739-
* @param array $filedocdata
807+
* @param array $docdata
740808
* @return bool
741809
*/
742-
public function index_single_file_document($filedocdata): bool {
810+
public function index_single_document(array $docdata): bool {
743811
try {
744812
$url = $this->get_url();
745813
$luceneversion = $this->get_es_lucene_version();
@@ -749,8 +817,8 @@ public function index_single_file_document($filedocdata): bool {
749817
$docprefix = '';
750818
}
751819

752-
$docurl = $url . '/' . $this->config->index . '/' . $docprefix . 'doc/' . $filedocdata['id'];
753-
$jsondoc = json_encode($filedocdata);
820+
$docurl = $url . '/' . $this->config->index . '/' . $docprefix . 'doc/' . $docdata['id'];
821+
$jsondoc = json_encode($docdata);
754822

755823
$client = new \search_elastic\esrequest();
756824
$response = $client->post($docurl, $jsondoc);

classes/local/service/error_service.php

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ private static function retry_failed_document_indexing($error, $searcharea, $eng
361361
try {
362362
$itemid = $error->get('itemid');
363363
$context = context::instance_by_id($error->get('contextid'));
364+
$config = get_config('search_elastic');
364365

365366
// Get the record from search area.
366367
$record = self::get_record_for_context($searcharea, $context, $itemid);
@@ -376,6 +377,19 @@ private static function retry_failed_document_indexing($error, $searcharea, $eng
376377
return ['success' => false, 'message' => 'Search area could not create document from record'];
377378
}
378379

380+
// Check document size before attempting to index.
381+
$docdata = $document->export_for_engine();
382+
$docsize = strlen(json_encode($docdata));
383+
$maxsize = (int)$config->sendsize;
384+
if ($docsize > $maxsize) {
385+
// Can't index this document because it's too large.
386+
$error->mark_failed();
387+
return [
388+
'success' => false,
389+
'message' => "Document too large ($docsize) bytes exceeds $config->sendsize bytes limit).",
390+
];
391+
}
392+
379393
// Index the parent document first.
380394
$success = $engine->add_document($document, false);
381395
if (!$success) {
@@ -397,23 +411,43 @@ private static function retry_failed_document_indexing($error, $searcharea, $eng
397411
}
398412

399413
$fileerrorcount = 0;
414+
$filesskipped = 0;
400415

401416
foreach ($files as $file) {
402417
$filedocdata = $document->export_file_for_engine($file);
403-
$success = $engine->index_single_file_document($filedocdata);
418+
419+
// Check file document size.
420+
$filesize = strlen(json_encode($filedocdata));
421+
if ($filesize > $config->sendsize) {
422+
$filesskipped++;
423+
debugging("Skipping file (too large): {$file->get_filename()} ($filesize bytes, exceeds $maxsize bytes limit)");
424+
continue;
425+
}
426+
427+
$success = $engine->index_single_document($filedocdata);
404428
if (!$success) {
405429
$fileerrorcount++;
406430
}
407431
}
408432

409433
if ($fileerrorcount == 0) {
410434
return ['success' => true, 'message' => 'Content reindexed successfully'];
435+
} else if ($filesskipped > 0 && $fileerrorcount == 0) {
436+
return [
437+
'success' => true, 'message' => "Content reindexed $filesskipped file(s) skipped - too large",
438+
];
439+
} else if ($filesskipped > 0 && $fileerrorcount > 0) {
440+
$error->mark_failed();
441+
return [
442+
'success' => false,
443+
'message' => "Failed to reindex some files. $filesskipped file(s) skipped (too large).",
444+
];
411445
}
412446

413447
$error->mark_failed();
414448
return ['success' => false, 'message' => 'Failed to reindex content'];
415449
} catch (Exception $e) {
416-
return ['success' => false, 'message' => 'Exception during retry: ' . $e->getMessage()];
450+
return ['success' => false, 'message' => "Exception during retry: {$e->getMessage()}"];
417451
}
418452
}
419453

@@ -503,7 +537,7 @@ private static function retry_file_extraction_and_indexing($error, $searcharea,
503537
$filedocdata = $parentdocument->export_file_for_engine($file);
504538

505539
// Index just this specific file document.
506-
$success = $engine->index_single_file_document($filedocdata);
540+
$success = $engine->index_single_document($filedocdata);
507541

508542
if ($success) {
509543
return ['success' => true, 'message' => 'File content extracted and indexed successfully'];

lang/en/search_elastic.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
*/
2424

2525
$string['actions'] = 'Actions';
26-
$string['addfail'] = 'Failed to add document to index';
26+
$string['addfail'] = 'Failed to add document to index.';
2727
$string['adminsettings'] = 'Plugin settings';
2828
$string['advsettings'] = 'Advanced settings';
2929
$string['all'] = 'All';

0 commit comments

Comments
 (0)