3232namespace search_elastic ;
3333
3434use search_elastic \local \service \error_service ;
35+ use stdClass ;
3536
3637/**
3738 * Elasticsearch engine.
@@ -620,73 +621,135 @@ private function batch_add_documents($jsonpayload, $isdoc = false, $sendnow = fa
620621 // If we don't have enough data to send yet return early.
621622 if ($ this ->payloadsize < $ this ->config ->sendsize && !$ sendnow ) {
622623 return $ numdocsignored ;
623- } else if ($ this ->payloadsize > 0 ) { // Make sure we have at least some data to send.
624- $ url = $ this ->get_url ();
625- $ client = new \search_elastic \esrequest ();
626- $ docurl = $ url . '/ ' . $ this ->config ->index . '/_bulk ' ;
627- $ response = $ client ->post ($ docurl , $ this ->payload );
628- $ responsebody = json_decode ($ response ->getBody ());
624+ }
629625
630- // Process response.
631- // If no errors were returned from bulk operation then numdocs = numrecords.
632- // If there are errors we need to iterate through he response and count how many.
633- if ($ response ->getStatusCode () == 413 ) {
634- // TODO: add handling to retry sending payload one record at a time.
635- $ message = get_string ('addfail ' , 'search_elastic ' ) . ' Request Entity Too Large ' ;
636- error_service::record_batch_error ($ message , $ this ->payload );
637- $ numdocsignored = $ this ->count ;
638- } else if ($ response ->getStatusCode () >= 300 ) {
639- $ message = get_string ('addfail ' , 'search_elastic ' ) . ' Error Code: ' . $ response ->getStatusCode ();
640- error_service::record_batch_error ($ message , $ this ->payload );
641- $ numdocsignored = $ this ->count ;
642- } else if (isset ($ responsebody ->errors ) && $ responsebody ->errors ) {
643- $ payloaddocs = $ this ->parse_payload_documents ();
644-
645- if (isset ($ responsebody ->items ) && is_array ($ responsebody ->items )) {
646- foreach ($ responsebody ->items as $ responseindex => $ item ) {
647- if (isset ($ item ->index ->status ) && $ item ->index ->status >= 300 ) {
648- $ errortype = $ item ->index ->error ->type ?? 'unknown ' ;
649- $ errorreason = $ item ->index ->error ->reason ?? 'unknown ' ;
650-
651- $ message = get_string ('addfail ' , 'search_elastic ' ) .
652- ' Error Type: ' . $ errortype .
653- ' Error Reason: ' . $ errorreason ;
654-
655- // Get corresponding document data using the same index.
656- $ docdata = null ;
657- if (isset ($ payloaddocs [$ responseindex ]) && is_array ($ payloaddocs [$ responseindex ])) {
658- $ candidatedoc = $ payloaddocs [$ responseindex ];
659-
660- // Verify document ID matches to ensure we have the right document.
661- $ expectedid = $ item ->index ->_id ?? null ;
662- $ parsedid = $ candidatedoc ['id ' ] ?? null ;
663-
664- if ($ expectedid && $ parsedid && $ expectedid === $ parsedid ) {
665- $ docdata = $ candidatedoc ;
666- } else {
667- // Log mismatch for debugging but continue with error recording.
668- debugging ('Document ID mismatch at index ' . $ responseindex . ': expected ' .
669- $ expectedid . ', got ' . $ parsedid , DEBUG_DEVELOPER );
670- }
671- }
672-
673- error_service::record_document_error ($ message , $ docdata );
674- $ numdocsignored ++;
675- }
676- }
677- }
626+ // Make sure we have at least some data to send.
627+ if ($ this ->payloadsize <= 0 ) {
628+ return $ numdocsignored ;
629+ }
630+
631+ // Send the bulk request.
632+ $ url = $ this ->get_url ();
633+ $ client = new \search_elastic \esrequest ();
634+ $ docurl = $ url . '/ ' . $ this ->config ->index . '/_bulk ' ;
635+ $ response = $ client ->post ($ docurl , $ this ->payload );
636+ $ responsebody = json_decode ($ response ->getBody ());
637+ $ statuscode = $ response ->getStatusCode ();
638+
639+ // Handle different response scenarios.
640+ if ($ statuscode == 413 ) {
641+ $ numdocsignored = $ this ->handle_413_retry ();
642+ } else if ($ statuscode >= 300 ) {
643+ $ message = get_string ('addfail ' , 'search_elastic ' ) . ' Error Code: ' . $ statuscode ;
644+ error_service::record_batch_error ($ message , $ this ->payload );
645+ $ numdocsignored = $ this ->count ;
646+ } else if (isset ($ responsebody ->errors ) && $ responsebody ->errors ) {
647+ $ numdocsignored = $ this ->log_bulk_response_item_errors ($ responsebody );
648+ }
649+
650+ // Reset the counts.
651+ $ this ->payload = false ;
652+ $ this ->payloadsize = 0 ;
653+
654+ // Reset the parent doc count after attempting to add.
655+ if ($ isdoc ) {
656+ $ this ->count = 0 ;
657+ }
658+
659+ return $ numdocsignored ;
660+ }
661+
662+ /**
663+ * Handle 413 payload too large error by retrying documents individually.
664+ *
665+ * @return int Number of documents ignored/failed.
666+ */
667+ private function handle_413_retry (): int {
668+ // Retry sending payload one record at a time.
669+ $ payloaddocs = $ this ->parse_payload_documents ();
670+ $ retryignored = 0 ;
671+ $ maxsize = (int )$ this ->config ->sendsize ;
672+
673+ foreach ($ payloaddocs as $ doc ) {
674+ if (is_null ($ doc )) {
675+ $ retryignored ++;
676+ continue ;
677+ }
678+
679+ // Check if individual document is too large.
680+ $ docsize = strlen (json_encode ($ doc ));
681+ if ($ docsize > $ maxsize ) {
682+ $ retryignored ++;
683+ $ message = get_string ('addfail ' , 'search_elastic ' ) .
684+ " Document too large ( $ docsize bytes exceeds $ maxsize bytes limit). Doc ID: ( {$ doc ['id ' ]}) " ;
685+ error_service::record_document_error ($ message , $ doc );
686+ continue ;
687+ }
688+
689+ if (!$ this ->index_single_document ($ doc )) {
690+ $ retryignored ++;
691+ $ message = get_string ('addfail ' , 'search_elastic ' ) .
692+ " Failed on individual retry after 413. Doc ID: ( {$ doc ['id ' ]}) " ;
693+ error_service::record_document_error ($ message , $ doc );
678694 }
695+ }
696+
697+ unset($ payloaddocs );
698+
699+ return $ retryignored ;
700+ }
679701
680- // Reser the counts.
681- $ this ->payload = false ;
682- $ this ->payloadsize = 0 ;
702+ /**
703+ * Matches failed items with original documents and records error details.
704+ *
705+ * @param stdClass $responsebody Decoded JSON response from bulk operation.
706+ * @return int Number of documents that failed.
707+ */
708+ private function log_bulk_response_item_errors (stdClass $ responsebody ): int {
709+ $ payloaddocs = $ this ->parse_payload_documents ();
710+ $ numdocsignored = 0 ;
711+
712+ if (!isset ($ responsebody ->items ) || !is_array ($ responsebody ->items )) {
713+ unset($ payloaddocs );
714+ return $ numdocsignored ;
715+ }
683716
684- // Reset the parent doc count after attempting to add.
685- if ($ isdoc ) {
686- $ this -> count = 0 ;
717+ foreach ( $ responsebody -> items as $ responseindex => $ item ) {
718+ if (! isset ( $ item -> index -> status ) || $ item -> index -> status < 300 ) {
719+ continue ;
687720 }
721+
722+ $ errortype = $ item ->index ->error ->type ?? 'unknown ' ;
723+ $ errorreason = $ item ->index ->error ->reason ?? 'unknown ' ;
724+
725+ $ message = get_string ('addfail ' , 'search_elastic ' ) .
726+ ' Error Type: ' . $ errortype .
727+ ' Error Reason: ' . $ errorreason ;
728+
729+ // Get corresponding document data using the same index.
730+ $ docdata = null ;
731+ if (isset ($ payloaddocs [$ responseindex ]) && is_array ($ payloaddocs [$ responseindex ])) {
732+ $ candidatedoc = $ payloaddocs [$ responseindex ];
733+
734+ // Verify document ID matches to ensure we have the right document.
735+ $ expectedid = $ item ->index ->_id ?? null ;
736+ $ parsedid = $ candidatedoc ['id ' ] ?? null ;
737+
738+ if ($ expectedid && $ parsedid && $ expectedid === $ parsedid ) {
739+ $ docdata = $ candidatedoc ;
740+ } else {
741+ // Log mismatch for debugging but continue with error recording.
742+ debugging ('Document ID mismatch at index ' . $ responseindex . ': expected ' .
743+ $ expectedid . ', got ' . $ parsedid , DEBUG_DEVELOPER );
744+ }
745+ }
746+
747+ error_service::record_document_error ($ message , $ docdata );
748+ $ numdocsignored ++;
688749 }
689750
751+ unset($ payloaddocs );
752+
690753 return $ numdocsignored ;
691754 }
692755
@@ -717,11 +780,16 @@ private function parse_payload_documents(): array {
717780 if ($ metadata && $ docdata ) {
718781 $ documents [$ docindex ] = [
719782 'metadata ' => $ metadata ,
720- 'id ' => $ docdata ['id ' ] ?? 'unknown ' ,
721- 'contextid ' => $ docdata ['contextid ' ] ?? \context_system::instance (),
722- 'areaid ' => $ docdata ['areaid ' ] ?? 'unknown ' ,
723- 'itemid ' => $ docdata ['itemid ' ] ?? 0 ,
724- 'modified ' => $ docdata ['modified ' ] ?? null ,
783+ ...$ docdata ,
784+ ];
785+
786+ // Add defaults for missing keys.
787+ $ documents [$ docindex ] += [
788+ 'id ' => 'unknown ' ,
789+ 'contextid ' => \context_system::instance (),
790+ 'areaid ' => 'unknown ' ,
791+ 'itemid ' => 0 ,
792+ 'modified ' => null ,
725793 ];
726794 }
727795 }
@@ -734,12 +802,12 @@ private function parse_payload_documents(): array {
734802
735803
736804 /**
737- * Index a single file document.
805+ * Index a single document.
738806 *
739- * @param array $filedocdata
807+ * @param array $docdata
740808 * @return bool
741809 */
742- public function index_single_file_document ( $ filedocdata ): bool {
810+ public function index_single_document ( array $ docdata ): bool {
743811 try {
744812 $ url = $ this ->get_url ();
745813 $ luceneversion = $ this ->get_es_lucene_version ();
@@ -749,8 +817,8 @@ public function index_single_file_document($filedocdata): bool {
749817 $ docprefix = '' ;
750818 }
751819
752- $ docurl = $ url . '/ ' . $ this ->config ->index . '/ ' . $ docprefix . 'doc/ ' . $ filedocdata ['id ' ];
753- $ jsondoc = json_encode ($ filedocdata );
820+ $ docurl = $ url . '/ ' . $ this ->config ->index . '/ ' . $ docprefix . 'doc/ ' . $ docdata ['id ' ];
821+ $ jsondoc = json_encode ($ docdata );
754822
755823 $ client = new \search_elastic \esrequest ();
756824 $ response = $ client ->post ($ docurl , $ jsondoc );
0 commit comments