Skip to content

Commit c06166b

Browse files
committed
Replace PaddleOCR with OpenAI Vision for Document Analysis
Refactored the document OCR job to use OpenAI's vision capabilities instead of PaddleOCR. Removed the old PaddleOCR-based implementation and introduced a new DocumentOcrJob leveraging the OpenAI .NET SDK for image analysis. Updated dependency injection and project references to include OpenAI packages. The new approach provides richer, AI-powered visual analysis and Markdown-formatted descriptions for document images.
1 parent 51981d3 commit c06166b

File tree

5 files changed

+157
-147
lines changed

5 files changed

+157
-147
lines changed

src/Infrastructure/DependencyInjection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
using CleanArchitecture.Blazor.Infrastructure.Services.Circuits;
1818
using CleanArchitecture.Blazor.Infrastructure.Services.MediatorWrapper;
1919
using CleanArchitecture.Blazor.Infrastructure.Services.MultiTenant;
20-
using CleanArchitecture.Blazor.Infrastructure.Services.PaddleOCR;
20+
using CleanArchitecture.Blazor.Infrastructure.Services.OpenAI;
2121
using CleanArchitecture.Blazor.Infrastructure.Services.Serialization;
2222
using FluentEmail.MailKitSmtp;
2323
using Microsoft.AspNetCore.Components.Server.Circuits;

src/Infrastructure/Infrastructure.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
<LangVersion>default</LangVersion>
99
</PropertyGroup>
1010
<ItemGroup>
11+
<PackageReference Include="Microsoft.Agents.AI.OpenAI" Version="1.0.0-preview.260121.1" />
12+
<PackageReference Include="OpenAI" Version="2.8.0" />
1113
<PackageReference Include="Microsoft.AspNetCore.Authentication.Facebook" Version="10.0.2" />
1214
<PackageReference Include="Microsoft.AspNetCore.Authentication.Google" Version="10.0.2" />
1315
<PackageReference Include="Microsoft.AspNetCore.Authentication.MicrosoftAccount" Version="10.0.2" />
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics;
5+
using CleanArchitecture.Blazor.Application.Features.Documents.Caching;
6+
using CleanArchitecture.Blazor.Domain.Common.Enums;
7+
using Microsoft.Extensions.AI;
8+
using Microsoft.Extensions.Configuration;
9+
using OpenAI;
10+
using OpenAI.Chat;
11+
12+
namespace CleanArchitecture.Blazor.Infrastructure.Services.OpenAI;
13+
14+
public class DocumentOcrJob : IDocumentOcrJob
15+
{
16+
private const int MaxContentLength = 4000;
17+
private const string SystemPrompt = "You are an advanced visual analysis AI. Analyze and describe images based on visual content, providing structured output in Markdown format.";
18+
private const string UserPrompt = "Analyze the following image and provide a comprehensive, briefly description in Markdown format.";
19+
20+
private readonly IApplicationDbContext _db;
21+
private readonly IConfiguration _config;
22+
private readonly ILogger<DocumentOcrJob> _logger;
23+
private readonly IApplicationHubWrapper _hubNotification;
24+
25+
public DocumentOcrJob(
26+
IApplicationHubWrapper hubNotification,
27+
IApplicationDbContext db,
28+
IConfiguration config,
29+
ILogger<DocumentOcrJob> logger)
30+
{
31+
_hubNotification = hubNotification;
32+
_db = db;
33+
_config = config;
34+
_logger = logger;
35+
}
36+
37+
public void Do(int id)
38+
{
39+
ProcessDocumentAsync(id, CancellationToken.None).Wait();
40+
}
41+
42+
public async Task Recognition(int id, CancellationToken cancellationToken)
43+
{
44+
await ProcessDocumentAsync(id, cancellationToken);
45+
}
46+
47+
private async Task ProcessDocumentAsync(int id, CancellationToken cancellationToken)
48+
{
49+
var stopwatch = Stopwatch.StartNew();
50+
51+
try
52+
{
53+
54+
55+
var document = await _db.Documents.FindAsync(new object[] { id }, cancellationToken);
56+
if (document is null)
57+
{
58+
_logger.LogWarning("Document not found. DocumentId: {DocumentId}", id);
59+
return;
60+
}
61+
62+
await _hubNotification.JobStarted(id, document.Title!);
63+
InvalidateDocumentCache();
64+
65+
if (string.IsNullOrWhiteSpace(document.URL))
66+
{
67+
await UpdateDocumentWithError(_db, document, "Document URL is missing or invalid.", cancellationToken);
68+
_logger.LogWarning("Invalid document URL. DocumentId: {DocumentId}", id);
69+
return;
70+
}
71+
72+
var analysisResult = await AnalyzeDocumentImageAsync(document.URL, cancellationToken);
73+
74+
await UpdateDocumentWithResult(_db, document, analysisResult, cancellationToken);
75+
await _hubNotification.JobCompleted(id, document.Title!);
76+
InvalidateDocumentCache();
77+
78+
stopwatch.Stop();
79+
_logger.LogInformation(
80+
"Document visual analysis completed. DocumentId: {DocumentId}, Title: {Title}, Duration: {Duration}ms",
81+
id, document.Title, stopwatch.ElapsedMilliseconds);
82+
}
83+
catch (Exception ex)
84+
{
85+
stopwatch.Stop();
86+
await _hubNotification.JobCompleted(id, $"Analysis failed: {ex.Message}");
87+
_logger.LogError(ex,
88+
"Document visual analysis failed. DocumentId: {DocumentId}, Duration: {Duration}ms",
89+
id, stopwatch.ElapsedMilliseconds);
90+
}
91+
}
92+
93+
private async Task<string> AnalyzeDocumentImageAsync(string imageUrl, CancellationToken cancellationToken)
94+
{
95+
try
96+
{
97+
var apiKey = _config["AISettings:OpenAIApiKey"];
98+
var model = _config["AISettings:OpenAIModel"];
99+
var client = new OpenAIClient(apiKey);
100+
var chatClient = client.GetChatClient(model);
101+
var agent = chatClient.AsAIAgent(instructions: SystemPrompt);
102+
103+
var message = new Microsoft.Extensions.AI.ChatMessage(ChatRole.User, [
104+
new TextContent(UserPrompt),
105+
new UriContent(imageUrl, "image/png")
106+
]);
107+
108+
var response = await agent.RunAsync(message, cancellationToken: cancellationToken);
109+
var result = response.Text ?? string.Empty;
110+
111+
return result.Length > MaxContentLength
112+
? result[..MaxContentLength]
113+
: result;
114+
}
115+
catch (Exception ex)
116+
{
117+
_logger.LogError(ex, "AI vision analysis failed. ImageUrl: {ImageUrl}", imageUrl);
118+
return $"[Analysis Error] {ex.Message}";
119+
}
120+
}
121+
122+
private async Task UpdateDocumentWithResult(
123+
IApplicationDbContext dbContext,
124+
Document document,
125+
string content,
126+
CancellationToken cancellationToken)
127+
{
128+
document.Status = JobStatus.Done;
129+
document.Description = "Visual analysis completed successfully.";
130+
document.Content = content;
131+
132+
await dbContext.SaveChangesAsync(cancellationToken);
133+
}
134+
135+
private async Task UpdateDocumentWithError(
136+
IApplicationDbContext dbContext,
137+
Document document,
138+
string errorMessage,
139+
CancellationToken cancellationToken)
140+
{
141+
document.Status = JobStatus.Pending;
142+
document.Description = $"Analysis failed: {errorMessage}";
143+
document.Content = string.Empty;
144+
145+
await dbContext.SaveChangesAsync(cancellationToken);
146+
await _hubNotification.JobCompleted(document.Id, errorMessage);
147+
}
148+
149+
private static void InvalidateDocumentCache()
150+
{
151+
DocumentCacheKey.Refresh();
152+
}
153+
}
154+

src/Infrastructure/Services/PaddleOCR/DocumentOcrJob.cs

Lines changed: 0 additions & 145 deletions
This file was deleted.

src/Server.UI/Server.UI.csproj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
<LangVersion>default</LangVersion>
1616
</PropertyGroup>
1717
<ItemGroup>
18-
<PackageReference Include="OpenAI" Version="2.8.0" />
1918
<PackageReference Include="Hangfire.AspNetCore" Version="1.8.22" />
2019
<PackageReference Include="Hangfire.InMemory" Version="1.0.0" />
2120
<PackageReference Include="BlazorDownloadFile" Version="2.4.0.2" />

0 commit comments

Comments
 (0)