Skip to content

Commit e743a6c

Browse files
adding images of console
1 parent ea010ae commit e743a6c

File tree

5 files changed

+112
-1
lines changed

5 files changed

+112
-1
lines changed
24.3 KB
Loading
25.1 KB
Loading
52.5 KB
Loading
37.5 KB
Loading

articles/search/tutorial-optimize-indexing-pushapi.md

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,16 @@ public static double EstimateObjectSize(object data)
206206

207207
This allows us to determine which batch size is most efficient by MB/s.
208208

209+
The function requires an `ISearchIndexClient` as well as the number of tries you'd like to test for each batch size. As there may be some variability in indexing times for batch, we try each batch three times by default to make the results more significant.
210+
211+
```csharp
212+
await TestBatchSizes(indexClient, numTries: 3);
213+
```
214+
215+
When you run the function, you should see an output like the following:
216+
217+
![Output of test batch size function](media/tutorial-optimize-data-indexing/test-batch-sizes.png "Output of test batch size function")
218+
209219
## 5 - Index data
210220

211221
### Use multiple threads/workers
@@ -223,7 +233,108 @@ As you ramp up the requests hitting the search service, you may encounter [HTTP
223233

224234
In the event of a failure, requests should be retried using an [exponential backoff retry strategy](https://docs.microsoft.com/en-us/dotnet/architecture/microservices/implement-resilient-applications/implement-retries-exponential-backoff).
225235
226-
Azure Cognitive Search's .NET SDK automatically retries 503s and other failed requests but you'll need to implement your own logic to retry 207s. Open source tools such as [Polly](https://github.com/App-vNext/Polly) can also be used to implement a retry strategy. In this sample,
236+
Azure Cognitive Search's .NET SDK automatically retries 503s and other failed requests but you'll need to implement your own logic to retry 207s. Open source tools such as [Polly](https://github.com/App-vNext/Polly) can also be used to implement a retry strategy. In this sample, we implement our own exponential backoff strategy:
237+
238+
```csharp
239+
// Define parameters for exponential backoff
240+
int attempts = 0;
241+
TimeSpan delay = delay = TimeSpan.FromSeconds(2);
242+
int maxRetryAttempts = 5;
243+
```
244+
245+
```csharp
246+
// Create batch of documents for indexing
247+
IndexBatch<Hotel> batch = IndexBatch.Upload(hotels);
248+
249+
// Implement exponential backoff
250+
do
251+
{
252+
try
253+
{
254+
attempts++;
255+
var response = await indexClient.Documents.IndexAsync(batch);
256+
return response;
257+
}
258+
catch (IndexBatchException ex)
259+
{
260+
Console.WriteLine("[Attempt: {0} of {1} Failed] - Error: {2}", attempts, maxRetryAttempts, ex.Message);
261+
262+
if (attempts == maxRetryAttempts)
263+
break;
264+
265+
// Find the failed items and create a new batch to retry
266+
batch = ex.FindFailedActionsToRetry(batch, x => x.HotelId);
267+
Console.WriteLine("Retrying failed documents using exponential backoff...\n");
268+
269+
Task.Delay(delay).Wait();
270+
delay = delay * 2;
271+
}
272+
catch (Exception ex)
273+
{
274+
Console.WriteLine("[Attempt: {0} of {1} Failed] - Error: {2} \n", attempts, maxRetryAttempts, ex.Message);
275+
276+
if (attempts == maxRetryAttempts)
277+
break;
278+
279+
Task.Delay(delay).Wait();
280+
delay = delay * 2;
281+
}
282+
} while (true);
283+
```
284+
285+
```csharp
286+
public static async Task IndexData(ISearchIndexClient indexClient, List<Hotel> hotels, int batchSize, int numThreads)
287+
{
288+
int numDocs = hotels.Count;
289+
Console.WriteLine("Uploading {0} documents...\n", numDocs.ToString());
290+
291+
DateTime startTime = DateTime.Now;
292+
Console.WriteLine("Started at: {0} \n", startTime);
293+
Console.WriteLine("Creating {0} threads...\n", numThreads);
294+
295+
// Creating a list to hold active tasks
296+
List<Task<DocumentIndexResult>> uploadTasks = new List<Task<DocumentIndexResult>>();
297+
298+
for (int i = 0; i < numDocs; i += batchSize)
299+
{
300+
List<Hotel> hotelBatch = hotels.GetRange(i, batchSize);
301+
var task = ExponentialBackoffAsync(indexClient, hotelBatch, i);
302+
uploadTasks.Add(task);
303+
Console.WriteLine("Sending a batch of {0} docs starting with doc {1}...\n", batchSize, i);
304+
305+
// Checking if we've hit the specified number of threads
306+
if (uploadTasks.Count >= numThreads)
307+
{
308+
Task<DocumentIndexResult> firstTaskFinished = await Task.WhenAny(uploadTasks);
309+
Console.WriteLine("Finished a thread, kicking off another...");
310+
uploadTasks.Remove(firstTaskFinished);
311+
}
312+
}
313+
314+
// waiting for remaining results to finish
315+
await Task.WhenAll(uploadTasks);
316+
317+
DateTime endTime = DateTime.Now;
318+
319+
TimeSpan runningTime = endTime - startTime;
320+
Console.WriteLine("\nEnded at: {0} \n", endTime);
321+
Console.WriteLine("Upload time total: {0}", runningTime);
322+
323+
double timePerBatch = Math.Round(runningTime.TotalMilliseconds / (numDocs / batchSize), 4);
324+
Console.WriteLine("Upload time per batch: {0} ms", timePerBatch);
325+
326+
double timePerDoc = Math.Round(runningTime.TotalMilliseconds / numDocs, 4);
327+
Console.WriteLine("Upload time per document: {0} ms \n", timePerDoc);
328+
}
329+
```
330+
331+
```cmd
332+
ExponentialBackoff.IndexData(indexClient, hotels, 1000, 8).Wait();
333+
```
334+
335+
![Output of index data function](media/tutorial-optimize-data-indexing/index-data-start.png "Output of index data function")
336+
337+
![Error from index data function](media/tutorial-optimize-data-indexing/index-data-error.png "Output of test batch size function")
227338

228339
## 6 - Explore index
229340

0 commit comments

Comments
 (0)