diff --git a/shared/Ocr/Tesseract/TesseractOcrEngine.cs b/shared/Ocr/Tesseract/TesseractOcrEngine.cs index 6b5d7c7ae..bca730239 100644 --- a/shared/Ocr/Tesseract/TesseractOcrEngine.cs +++ b/shared/Ocr/Tesseract/TesseractOcrEngine.cs @@ -13,7 +13,7 @@ public sealed class TesseractOcrEngine : IOcrEngine, IDisposable private readonly TesseractEngine _engine; /// - /// Creates a new instance of the TesseractEngineWrapper passing in a valid TesseractEngine. + /// Creates a new instance of the TesseractOcrEngine passing in a valid TesseractEngine. /// public TesseractOcrEngine(TesseractConfig tesseractConfig) { @@ -28,15 +28,25 @@ public TesseractOcrEngine(TesseractConfig tesseractConfig) /// public async Task ExtractTextFromImageAsync(Stream imageContent, CancellationToken cancellationToken = default) { - await using (var imgStream = new MemoryStream()) + try { - await imageContent.CopyToAsync(imgStream, cancellationToken); - imgStream.Position = 0; - - using var img = Pix.LoadFromMemory(imgStream.ToArray()); - - using var page = this._engine.Process(img); - return page.GetText(); + // Use a buffer for CopyToAsync to reduce memory usage for large images + await using (var imgStream = new MemoryStream()) + { + await imageContent.CopyToAsync(imgStream, 81920, cancellationToken).ConfigureAwait(false); // Buffered copy with 80 KB buffer size + imgStream.Position = 0; // Reset position for reading + + // Load image from memory and process with Tesseract + using var img = Pix.LoadFromMemory(imgStream.ToArray()); + using var page = this._engine.Process(img); + + return page.GetText(); // Return the extracted text + } + } + catch (OperationCanceledException) + { + // If operation is canceled, return an empty string or handle accordingly + return string.Empty; } } diff --git a/shared/ServiceConfiguration.cs b/shared/ServiceConfiguration.cs index 313de466f..deafe6635 100644 --- a/shared/ServiceConfiguration.cs +++ b/shared/ServiceConfiguration.cs @@ -496,23 +496,28 @@ private void SetupForOpenAI() /// KM builder /// Action used to configure the service collection /// Target type/interface - private T GetServiceInstance(IKernelMemoryBuilder builder, Action addCustomService) + private T GetServiceInstance(IKernelMemoryBuilder builder, Action addCustomService) where T : class { - // Clone the list of service descriptors, skipping T descriptor - IServiceCollection services = new ServiceCollection(); - foreach (ServiceDescriptor d in builder.Services) + // Temporarily register the service with a scoped lifecycle to ensure it’s not duplicated or retained in memory unnecessarily + var serviceProvider = builder.Services.BuildServiceProvider(); + + // Add the custom service configuration directly in a new scope + using (var scope = serviceProvider.CreateScope()) { - if (d.ServiceType == typeof(T)) { continue; } - - services.Add(d); + // Apply custom service configuration within the scope + addCustomService(scope.ServiceProvider.GetRequiredService()); + + // Attempt to resolve the service + T instance = scope.ServiceProvider.GetService(); + + // Check if the instance was successfully created + if (instance == null) + { + throw new ConfigurationException($"Unable to build {typeof(T).Name}"); + } + + return instance; } - - // Add the custom T descriptor - addCustomService.Invoke(services); - - // Build and return an instance of T, as defined by `addCustomService` - return services.BuildServiceProvider().GetService() - ?? throw new ConfigurationException($"Unable to build {nameof(T)}"); } ///