Skip to content

Commit 3d5c1d3

Browse files
authored
Fix Chat documents ingestion (#338)
1 parent 45d6eec commit 3d5c1d3

File tree

33 files changed

+1334
-125
lines changed

33 files changed

+1334
-125
lines changed

src/Abstractions/CrestApps.OrchardCore.AI.Abstractions/IAIChatSessionHandler.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
using CrestApps.OrchardCore.AI.Models;
2+
using CrestApps.OrchardCore.Services;
23

34
namespace CrestApps.OrchardCore.AI;
45

56
/// <summary>
67
/// Handles lifecycle events raised during an AI chat session, such as when
78
/// a message exchange completes. Implementations can perform post-processing
89
/// tasks like data extraction, analytics, or workflow triggers.
10+
/// Inherits from <see cref="ICatalogEntryHandler{T}"/> to support standard
11+
/// lifecycle events (Initializing, Initialized, Creating, Created, Loaded,
12+
/// Deleting, Deleted, Updating, Updated, Validating, Validated).
913
/// </summary>
10-
public interface IAIChatSessionHandler
14+
public interface IAIChatSessionHandler : ICatalogEntryHandler<AIChatSession>
1115
{
1216
/// <summary>
1317
/// Called after a user message has been processed and the assistant response
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
namespace CrestApps.OrchardCore.AI.Models;
2+
3+
/// <summary>
4+
/// Metadata for storing data source configuration on an entity.
5+
/// </summary>
6+
public sealed class DataSourceMetadata
7+
{
8+
/// <summary>
9+
/// Gets or sets the data source ID.
10+
/// </summary>
11+
public string DataSourceId { get; set; }
12+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
using CrestApps.OrchardCore.AI.Models;
2+
using CrestApps.OrchardCore.Core.Handlers;
3+
4+
namespace CrestApps.OrchardCore.AI.Core.Handlers;
5+
6+
/// <summary>
7+
/// Base class for <see cref="IAIChatSessionHandler"/> implementations.
8+
/// Provides virtual no-op implementations for all lifecycle events
9+
/// inherited from <see cref="ICatalogEntryHandler{AIChatSession}"/>
10+
/// and <see cref="IAIChatSessionHandler.MessageCompletedAsync"/>.
11+
/// </summary>
12+
public abstract class AIChatSessionHandlerBase : CatalogEntryHandlerBase<AIChatSession>, IAIChatSessionHandler
13+
{
14+
/// <inheritdoc/>
15+
public virtual Task MessageCompletedAsync(ChatMessageCompletedContext context)
16+
=> Task.CompletedTask;
17+
}

src/Core/CrestApps.OrchardCore.AI.Core/Models/AIProfileDataSourceMetadata.cs

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/Core/CrestApps.OrchardCore.AI.Core/Services/DefaultAIChatSessionManager.cs

Lines changed: 18 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,8 @@
22
using CrestApps.OrchardCore.AI.Core.Indexes;
33
using CrestApps.OrchardCore.AI.Models;
44
using Microsoft.AspNetCore.Http;
5-
using Microsoft.Extensions.DependencyInjection;
65
using Microsoft.Extensions.Logging;
76
using OrchardCore;
8-
using OrchardCore.Environment.Shell.Scope;
9-
using OrchardCore.Indexing;
10-
using OrchardCore.Indexing.Models;
117
using OrchardCore.Modules;
128
using YesSql;
139
using ISession = YesSql.ISession;
@@ -20,23 +16,26 @@ public sealed class DefaultAIChatSessionManager : IAIChatSessionManager
2016
private readonly IHttpContextAccessor _httpContextAccessor;
2117
private readonly IClientIPAddressAccessor _clientIPAddressAccessor;
2218
private readonly ISession _session;
23-
private readonly IAIDocumentStore _documentStore;
2419
private readonly IAIChatSessionPromptStore _promptStore;
20+
private readonly IEnumerable<IAIChatSessionHandler> _handlers;
21+
private readonly ILogger _logger;
2522

2623
public DefaultAIChatSessionManager(
2724
IClock clock,
2825
IHttpContextAccessor httpContextAccessor,
2926
IClientIPAddressAccessor clientIPAddressAccessor,
3027
ISession session,
31-
IAIDocumentStore documentStore,
32-
IAIChatSessionPromptStore promptStore)
28+
IAIChatSessionPromptStore promptStore,
29+
IEnumerable<IAIChatSessionHandler> handlers,
30+
ILogger<DefaultAIChatSessionManager> logger)
3331
{
3432
_clock = clock;
3533
_httpContextAccessor = httpContextAccessor;
3634
_clientIPAddressAccessor = clientIPAddressAccessor;
3735
_session = session;
38-
_documentStore = documentStore;
3936
_promptStore = promptStore;
37+
_handlers = handlers;
38+
_logger = logger;
4039
}
4140

4241
public async Task<AIChatSession> NewAsync(AIProfile profile, NewAIChatSessionContext context)
@@ -193,13 +192,17 @@ public async Task<bool> DeleteAsync(string sessionId)
193192
return false;
194193
}
195194

196-
await CleanupSessionDocumentsAsync(chatSession);
195+
var deletingContext = new CrestApps.OrchardCore.Models.DeletingContext<AIChatSession>(chatSession);
196+
await _handlers.InvokeAsync((handler, ctx) => handler.DeletingAsync(ctx), deletingContext, _logger);
197197

198198
// Delete all prompts associated with this session.
199199
await _promptStore.DeleteAllPromptsAsync(chatSession.SessionId);
200200

201201
_session.Delete(chatSession, collection: AIConstants.CollectionName);
202202

203+
var deletedContext = new CrestApps.OrchardCore.Models.DeletedContext<AIChatSession>(chatSession);
204+
await _handlers.InvokeAsync((handler, ctx) => handler.DeletedAsync(ctx), deletedContext, _logger);
205+
203206
return true;
204207
}
205208

@@ -225,85 +228,20 @@ public async Task<int> DeleteAllAsync(string profileId)
225228

226229
foreach (var session in sessions)
227230
{
228-
await CleanupSessionDocumentsAsync(session);
231+
var deletingContext = new CrestApps.OrchardCore.Models.DeletingContext<AIChatSession>(session);
232+
await _handlers.InvokeAsync((handler, ctx) => handler.DeletingAsync(ctx), deletingContext, _logger);
229233

230234
// Delete all prompts associated with this session.
231235
await _promptStore.DeleteAllPromptsAsync(session.SessionId);
232236

233237
_session.Delete(session, collection: AIConstants.CollectionName);
234-
totalDeleted++;
235-
}
236-
237-
return totalDeleted;
238-
}
239-
240-
/// <summary>
241-
/// Removes all documents associated with the given session from the document store
242-
/// and schedules deferred removal of their chunks from all AI document indexes.
243-
/// </summary>
244-
private async Task CleanupSessionDocumentsAsync(AIChatSession session)
245-
{
246-
var documents = await _documentStore.GetDocumentsAsync(
247-
session.SessionId,
248-
AIConstants.DocumentReferenceTypes.ChatSession);
249-
250-
if (documents.Count == 0)
251-
{
252-
return;
253-
}
254-
255-
var chunkIds = new List<string>();
256-
257-
foreach (var doc in documents)
258-
{
259-
if (doc.Chunks != null)
260-
{
261-
for (var i = 0; i < doc.Chunks.Count; i++)
262-
{
263-
chunkIds.Add($"{doc.ItemId}_{i}");
264-
}
265-
}
266-
267-
await _documentStore.DeleteAsync(doc);
268-
}
269-
270-
if (chunkIds.Count > 0)
271-
{
272-
ShellScope.AddDeferredTask(scope => RemoveDocumentChunksAsync(scope, chunkIds));
273-
}
274-
}
275-
276-
private static async Task RemoveDocumentChunksAsync(ShellScope scope, List<string> chunkIds)
277-
{
278-
var services = scope.ServiceProvider;
279-
var indexStore = services.GetRequiredService<IIndexProfileStore>();
280238

281-
var indexProfiles = await indexStore.GetByTypeAsync(AIConstants.AIDocumentsIndexingTaskType);
239+
var deletedContext = new CrestApps.OrchardCore.Models.DeletedContext<AIChatSession>(session);
240+
await _handlers.InvokeAsync((handler, ctx) => handler.DeletedAsync(ctx), deletedContext, _logger);
282241

283-
if (!indexProfiles.Any())
284-
{
285-
return;
242+
totalDeleted++;
286243
}
287244

288-
var logger = services.GetRequiredService<ILogger<DefaultAIChatSessionManager>>();
289-
290-
foreach (var indexProfile in indexProfiles)
291-
{
292-
var documentIndexManager = services.GetKeyedService<IDocumentIndexManager>(indexProfile.ProviderName);
293-
294-
if (documentIndexManager == null)
295-
{
296-
continue;
297-
}
298-
299-
try
300-
{
301-
await documentIndexManager.DeleteDocumentsAsync(indexProfile, chunkIds);
302-
}
303-
catch (Exception ex)
304-
{
305-
logger.LogError(ex, "Error removing session document chunks from index '{IndexName}'.", indexProfile.IndexName);
306-
}
307-
}
245+
return totalDeleted;
308246
}
309247
}

src/CrestApps.OrchardCore.Documentations/docs/ai/chat.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ The **AI Chat** feature builds upon the **AI Services** feature by adding AI cha
2323
- **Azure AI Inference Chat** (`CrestApps.OrchardCore.AzureAIInference`): AI services using Azure AI Inference (GitHub models) models.
2424
- **Ollama AI Chat** (`CrestApps.OrchardCore.Ollama`): AI-powered chat using Ollama service.
2525

26+
### Welcome Message Behavior
27+
28+
When an AI profile has a **Welcome Message** configured, it is displayed to the user as the first message in the chat. The welcome message is also included in the conversation history sent to the AI model as an assistant message, ensuring the model understands the full context of the conversation — including any questions posed in the welcome message that the user may respond to in their first prompt.
29+
2630
### Admin Chat User Interface
2731

2832
![Screen cast of the admin chat](/img/docs/admin-ui-sample.gif)

src/CrestApps.OrchardCore.Documentations/docs/ai/documents/openxml.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,5 @@ To use these files, please convert them to the newer formats (.docx, .xlsx, .ppt
5555
### Content Extraction Notes
5656

5757
- **Word**: Extracts text from all paragraphs in the main document body
58-
- **Excel**: Extracts data row-by-row, with cells separated by tabs
58+
- **Excel**: Extracts data row-by-row, with cells separated by tabs. Supports shared strings, inline strings, numeric values, and boolean cells.
5959
- **PowerPoint**: Extracts text from all text elements across all slides

src/CrestApps.OrchardCore.Documentations/docs/changelog/v2.0.0.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,36 @@ The citation and reference system has been completely reworked so that **every A
194194

195195
---
196196

197+
### Fix: Welcome Message Now Included in Chat History
198+
199+
When an AI Profile has a `WelcomeMessage`, it is the first message the user sees. Previously, the model had no knowledge of this welcome message, so it could not understand the user's first response (e.g., answering a question posed in the welcome message). The welcome message is now prepended as the first assistant message in the conversation history, giving the model full context of the conversation from the start.
200+
201+
---
202+
203+
### Fix: Chat Session Handler Lifecycle Events
204+
205+
`IAIChatSessionHandler` now extends `ICatalogEntryHandler<AIChatSession>`, adding full lifecycle events: `InitializingAsync`, `InitializedAsync`, `CreatingAsync`, `CreatedAsync`, `LoadedAsync`, `DeletingAsync`, `DeletedAsync`, `UpdatingAsync`, `UpdatedAsync`, `ValidatingAsync`, and `ValidatedAsync`. A new `AIChatSessionHandlerBase` base class (extending `CatalogEntryHandlerBase<AIChatSession>`) provides virtual no-op implementations for all lifecycle methods plus the existing `MessageCompletedAsync`, so handler implementations only need to override the events they care about.
206+
207+
`DefaultAIChatSessionManager` no longer depends on `IAIDocumentStore` directly. Instead, it invokes `DeletingAsync`/`DeletedAsync` lifecycle events on all registered `IAIChatSessionHandler` implementations when sessions are deleted. Document cleanup is now handled by a dedicated `AIChatSessionDocumentCleanupHandler` registered in the AI Documents feature, resolving a dependency injection exception when the AI Documents feature was not enabled.
208+
209+
**Breaking:** `IAIChatSessionHandler` now inherits from `ICatalogEntryHandler<AIChatSession>`. Existing implementations should extend `AIChatSessionHandlerBase` instead of implementing the interface directly to avoid having to implement all lifecycle methods.
210+
211+
---
212+
213+
### Fix: OpenXml Excel Data Extraction
214+
215+
Fixed `OpenXmlIngestionDocumentReader.GetCellValue` to correctly handle Excel cells stored as **inline strings** (`InlineString` cell type) and **boolean** values. Previously, inline string cells returned empty text because the code only checked `CellValue`, which is `null` for inline strings — the text is stored in the `InlineString` element instead. Shared string table lookup was also changed from LINQ `ElementAtOrDefault` to direct `ChildElements` indexer for O(1) access.
216+
217+
---
218+
219+
### Refactor: Unified DataSourceMetadata Type
220+
221+
`AIProfileDataSourceMetadata` and `ChatInteractionDataSourceMetadata` have been merged into a single `DataSourceMetadata` type in `CrestApps.OrchardCore.AI.Models`. Both types were identical (containing only a `DataSourceId` property) and served the same purpose on different entity types. Data migrations automatically rename the stored JSON property keys from the legacy names to `DataSourceMetadata`.
222+
223+
**Breaking:** `AIProfileDataSourceMetadata` (from `CrestApps.OrchardCore.AI.Core.Models`) and `ChatInteractionDataSourceMetadata` (from `CrestApps.OrchardCore.AI.Chat.Interactions`) have been removed. Use `DataSourceMetadata` (from `CrestApps.OrchardCore.AI.Models`) instead.
224+
225+
---
226+
197227
## Breaking Changes
198228

199229
### Changed: Navigation Paths

src/Modules/CrestApps.OrchardCore.AI.Chat.Interactions/ChatInteractionDataSourceMetadata.cs

Lines changed: 0 additions & 12 deletions
This file was deleted.

src/Modules/CrestApps.OrchardCore.AI.Chat.Interactions/Drivers/ChatInteractionDataSourceDisplayDriver.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public override IDisplayResult Edit(ChatInteraction interaction, BuildEditorCont
4040
{
4141
var dataSourceSettings = await _siteService.GetSettingsAsync<AIDataSourceSettings>();
4242

43-
var metadata = interaction.As<ChatInteractionDataSourceMetadata>();
43+
var metadata = interaction.As<DataSourceMetadata>();
4444
model.DataSourceId = metadata?.DataSourceId;
4545

4646
var ragMetadata = interaction.As<AIDataSourceRagMetadata>();
@@ -66,7 +66,7 @@ public override async Task<IDisplayResult> UpdateAsync(ChatInteraction interacti
6666

6767
if (dataSource != null)
6868
{
69-
interaction.Put(new ChatInteractionDataSourceMetadata
69+
interaction.Put(new DataSourceMetadata
7070
{
7171
DataSourceId = dataSource.ItemId,
7272
});
@@ -75,7 +75,7 @@ public override async Task<IDisplayResult> UpdateAsync(ChatInteraction interacti
7575
else
7676
{
7777
// Clear the metadata if no data source is selected
78-
interaction.Put(new ChatInteractionDataSourceMetadata());
78+
interaction.Put(new DataSourceMetadata());
7979
}
8080

8181
var dataSourceSettings = await _siteService.GetSettingsAsync<AIDataSourceSettings>();

0 commit comments

Comments
 (0)