@@ -206,6 +206,16 @@ public static double EstimateObjectSize(object data)
206
206
207
207
This allows us to determine which batch size is most efficient by MB /s .
208
208
209
+ The function requires an `ISearchIndexClient ` as well as the number of tries you 'd like to test for each batch size . As there may be some variability in indexing times for batch , we try each batch three times by default to make the results more significant .
210
+
211
+ ```csharp
212
+ await TestBatchSizes (indexClient , numTries : 3 );
213
+ ```
214
+
215
+ When you run the function , you should see an output like the following :
216
+
217
+ ! [Output of test batch size function ](media / tutorial - optimize - data - indexing / test - batch - sizes .png " Output of test batch size function" )
218
+
209
219
## 5 - Index data
210
220
211
221
### Use multiple threads/workers
@@ -223,7 +233,108 @@ As you ramp up the requests hitting the search service, you may encounter [HTTP
223
233
224
234
In the event of a failure , requests should be retried using an [exponential backoff retry strategy ](https :// docs.microsoft.com/en-us/dotnet/architecture/microservices/implement-resilient-applications/implement-retries-exponential-backoff).
225
235
226
- Azure Cognitive Search 's .NET SDK automatically retries 503s and other failed requests but you' ll need to implement your own logic to retry 207s . Open source tools such as [Polly ](https :// github.com/App-vNext/Polly) can also be used to implement a retry strategy. In this sample,
236
+ Azure Cognitive Search 's .NET SDK automatically retries 503s and other failed requests but you' ll need to implement your own logic to retry 207s . Open source tools such as [Polly ](https :// github.com/App-vNext/Polly) can also be used to implement a retry strategy. In this sample, we implement our own exponential backoff strategy:
237
+
238
+ ```csharp
239
+ // Define parameters for exponential backoff
240
+ int attempts = 0 ;
241
+ TimeSpan delay = delay = TimeSpan .FromSeconds (2 );
242
+ int maxRetryAttempts = 5 ;
243
+ ```
244
+
245
+ ```csharp
246
+ // Create batch of documents for indexing
247
+ IndexBatch < Hotel > batch = IndexBatch .Upload (hotels );
248
+
249
+ // Implement exponential backoff
250
+ do
251
+ {
252
+ try
253
+ {
254
+ attempts ++ ;
255
+ var response = await indexClient .Documents .IndexAsync (batch );
256
+ return response ;
257
+ }
258
+ catch (IndexBatchException ex )
259
+ {
260
+ Console .WriteLine (" [Attempt: {0} of {1} Failed] - Error: {2}" , attempts , maxRetryAttempts , ex .Message );
261
+
262
+ if (attempts == maxRetryAttempts )
263
+ break ;
264
+
265
+ // Find the failed items and create a new batch to retry
266
+ batch = ex .FindFailedActionsToRetry (batch , x => x .HotelId );
267
+ Console .WriteLine (" Retrying failed documents using exponential backoff...\n " );
268
+
269
+ Task .Delay (delay ).Wait ();
270
+ delay = delay * 2 ;
271
+ }
272
+ catch (Exception ex )
273
+ {
274
+ Console .WriteLine (" [Attempt: {0} of {1} Failed] - Error: {2} \n " , attempts , maxRetryAttempts , ex .Message );
275
+
276
+ if (attempts == maxRetryAttempts )
277
+ break ;
278
+
279
+ Task .Delay (delay ).Wait ();
280
+ delay = delay * 2 ;
281
+ }
282
+ } while (true );
283
+ ```
284
+
285
+ ```csharp
286
+ public static async Task IndexData (ISearchIndexClient indexClient , List < Hotel > hotels , int batchSize , int numThreads )
287
+ {
288
+ int numDocs = hotels .Count ;
289
+ Console .WriteLine (" Uploading {0} documents...\n " , numDocs .ToString ());
290
+
291
+ DateTime startTime = DateTime .Now ;
292
+ Console .WriteLine (" Started at: {0} \n " , startTime );
293
+ Console .WriteLine (" Creating {0} threads...\n " , numThreads );
294
+
295
+ // Creating a list to hold active tasks
296
+ List < Task < DocumentIndexResult >> uploadTasks = new List <Task <DocumentIndexResult >>();
297
+
298
+ for (int i = 0 ; i < numDocs ; i += batchSize )
299
+ {
300
+ List < Hotel > hotelBatch = hotels .GetRange (i , batchSize );
301
+ var task = ExponentialBackoffAsync (indexClient , hotelBatch , i );
302
+ uploadTasks .Add (task );
303
+ Console .WriteLine (" Sending a batch of {0} docs starting with doc {1}...\n " , batchSize , i );
304
+
305
+ // Checking if we've hit the specified number of threads
306
+ if (uploadTasks .Count >= numThreads )
307
+ {
308
+ Task < DocumentIndexResult > firstTaskFinished = await Task .WhenAny (uploadTasks );
309
+ Console .WriteLine (" Finished a thread, kicking off another..." );
310
+ uploadTasks .Remove (firstTaskFinished );
311
+ }
312
+ }
313
+
314
+ // waiting for remaining results to finish
315
+ await Task .WhenAll (uploadTasks );
316
+
317
+ DateTime endTime = DateTime .Now ;
318
+
319
+ TimeSpan runningTime = endTime - startTime ;
320
+ Console .WriteLine (" \n Ended at: {0} \n " , endTime );
321
+ Console .WriteLine (" Upload time total: {0}" , runningTime );
322
+
323
+ double timePerBatch = Math .Round (runningTime .TotalMilliseconds / (numDocs / batchSize ), 4 );
324
+ Console .WriteLine (" Upload time per batch: {0} ms" , timePerBatch );
325
+
326
+ double timePerDoc = Math .Round (runningTime .TotalMilliseconds / numDocs , 4 );
327
+ Console .WriteLine (" Upload time per document: {0} ms \n " , timePerDoc );
328
+ }
329
+ ```
330
+
331
+ ```cmd
332
+ ExponentialBackoff .IndexData (indexClient , hotels , 1000 , 8 ).Wait ();
333
+ ```
334
+
335
+ ! [Output of index data function ](media / tutorial - optimize - data - indexing / index - data - start .png " Output of index data function" )
336
+
337
+ ! [Error from index data function ](media / tutorial - optimize - data - indexing / index - data - error .png " Output of test batch size function" )
227
338
228
339
## 6 - Explore index
229
340
0 commit comments