Skip to content

Commit 5c493d6

Browse files
specialtacticstimgws
authored andcommitted
Implement chunking of records before sending to ES (elasticquent#148)
* Resolve a problem whereby a large amount of data for a given model would result in a "Request size exceeded 10485760 bytes" error in AWS Elastic Search (and potentially other configurations)
1 parent 7f93c6c commit 5c493d6

File tree

2 files changed

+42
-12
lines changed

2 files changed

+42
-12
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
composer.phar
33
composer.lock
44
.DS_Store
5-
Thumbs.db
5+
Thumbs.db
6+
.idea

src/ElasticquentCollectionTrait.php

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ trait ElasticquentCollectionTrait
1010
{
1111
use ElasticquentClientTrait;
1212

13+
/**
14+
* @var int The number of records (ie. models) to send to Elasticsearch in one go
15+
* Also, the number of models to get from the database at a time using Eloquent's chunk()
16+
*/
17+
static public $entriesToSendToElasticSearchInOneGo = 500;
18+
1319
/**
1420
* Add To Index
1521
*
@@ -23,21 +29,44 @@ public function addToIndex()
2329
return null;
2430
}
2531

26-
$params = array();
32+
// Use an stdClass to store result of elasticsearch operation
33+
$result = new \stdClass;
2734

28-
foreach ($this->all() as $item) {
29-
$params['body'][] = array(
30-
'index' => array(
31-
'_id' => $item->getKey(),
32-
'_type' => $item->getTypeName(),
33-
'_index' => $item->getIndexName(),
34-
),
35-
);
35+
// Iterate according to the amount configured, and put that iteration's worth of records into elastic search
36+
// This is done so that we do not exceed the maximum request size
37+
$chunkingResult = $this->chunk(static::$entriesToSendToElasticSearchInOneGo, function ($collectionChunk) use ($result) {
38+
$params = array();
39+
foreach ($collectionChunk as $item) {
40+
$params['body'][] = array(
41+
'index' => array(
42+
'_id' => $item->getKey(),
43+
'_type' => $item->getTypeName(),
44+
'_index' => $item->getIndexName(),
45+
),
46+
);
47+
48+
$params['body'][] = $item->getIndexDocumentData();
49+
}
50+
51+
$result->result = $this->getElasticSearchClient()->bulk($params);
3652

37-
$params['body'][] = $item->getIndexDocumentData();
53+
// Check for errors
54+
if ( (array_key_exists('errors', $result) && $result['errors'] != false ) || (array_key_exists('Message', $result) && stristr('Request size exceeded', $result['Message']) !== false)) {
55+
return false;
56+
}
57+
58+
// Remove vars immediately to prevent them hanging around in memory, in case we have a large number of iterations
59+
unset($collectionChunk, $params);
60+
});
61+
62+
// Get the result or null it
63+
if ($chunkingResult && property_exists($result, 'result')) {
64+
$result = $result->result;
65+
} else {
66+
$result = null;
3867
}
3968

40-
return $this->getElasticSearchClient()->bulk($params);
69+
return $result;
4170
}
4271

4372
/**

0 commit comments

Comments
 (0)