Skip to content

Commit 1ce774a

Browse files
authored
Merge pull request #653 from iceljc/master
refine knowledge doc filter
2 parents b942e71 + e760bd1 commit 1ce774a

File tree

6 files changed

+123
-14
lines changed

6 files changed

+123
-14
lines changed

src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,37 @@ public interface IKnowledgeService
2222
#endregion
2323

2424
#region Document
25+
/// <summary>
26+
/// Save documents and their contents to knowledgebase
27+
/// </summary>
28+
/// <param name="collectionName"></param>
29+
/// <param name="files"></param>
30+
/// <returns></returns>
2531
Task<UploadKnowledgeResponse> UploadDocumentsToKnowledge(string collectionName, IEnumerable<ExternalFileModel> files);
32+
/// <summary>
33+
/// Save document content to knowledgebase without saving the document
34+
/// </summary>
35+
/// <param name="collectionName"></param>
36+
/// <param name="fileName"></param>
37+
/// <param name="fileSource"></param>
38+
/// <param name="contents"></param>
39+
/// <param name="refData"></param>
40+
/// <returns></returns>
2641
Task<bool> ImportDocumentContentToKnowledge(string collectionName, string fileName, string fileSource, IEnumerable<string> contents, DocMetaRefData? refData = null);
42+
/// <summary>
43+
/// Delete one document and its related knowledge in the collection
44+
/// </summary>
45+
/// <param name="collectionName"></param>
46+
/// <param name="fileId"></param>
47+
/// <returns></returns>
2748
Task<bool> DeleteKnowledgeDocument(string collectionName, Guid fileId);
49+
/// <summary>
50+
/// Delete all documents and their related knowledge in the collection
51+
/// </summary>
52+
/// <param name="collectionName"></param>
53+
/// <param name="filter"></param>
54+
/// <returns></returns>
55+
Task<bool> DeleteKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
2856
Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
2957
Task<FileBinaryDataModel> GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId);
3058
#endregion

src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,14 @@ public class KnowledgeFileFilter : Pagination
44
{
55
public IEnumerable<Guid>? FileIds { get; set; }
66

7+
public IEnumerable<string>? FileNames { get; set; }
8+
9+
public IEnumerable<string>? ContentTypes { get; set; }
10+
711
public IEnumerable<string>? FileSources { get; set; }
12+
13+
public KnowledgeFileFilter()
14+
{
15+
16+
}
817
}

src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,20 @@ public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collecti
189189
matched = matched && filter.FileIds.Contains(metaData.FileId);
190190
}
191191

192+
if (!filter.FileNames.IsNullOrEmpty())
193+
{
194+
matched = matched && filter.FileNames.Contains(metaData.FileName);
195+
}
196+
192197
if (!filter.FileSources.IsNullOrEmpty())
193198
{
194199
matched = matched & filter.FileSources.Contains(metaData.FileSource);
195200
}
201+
202+
if (!filter.ContentTypes.IsNullOrEmpty())
203+
{
204+
matched = matched && filter.ContentTypes.Contains(metaData.ContentType);
205+
}
196206
}
197207

198208

src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,17 @@ public async Task<bool> DeleteKnowledgeDocument([FromRoute] string collection, [
149149
return response;
150150
}
151151

152+
[HttpDelete("/knowledge/document/{collection}/delete")]
153+
public async Task<bool> DeleteKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request)
154+
{
155+
var response = await _knowledgeService.DeleteKnowledgeDocuments(collection, request);
156+
return response;
157+
}
158+
152159
[HttpPost("/knowledge/document/{collection}/page")]
153160
public async Task<PagedItems<KnowledgeFileViewModel>> GetPagedKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request)
154161
{
155-
var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, new KnowledgeFileFilter
156-
{
157-
Page = request.Page,
158-
Size = request.Size
159-
});
162+
var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, request);
160163

161164
return new PagedItems<KnowledgeFileViewModel>
162165
{

src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using BotSharp.Abstraction.Files.Utilities;
44
using BotSharp.Abstraction.Knowledges.Helpers;
55
using BotSharp.Abstraction.VectorStorage.Enums;
6+
using System.Collections;
67
using System.Net.Http;
78
using System.Net.Mime;
89

@@ -151,8 +152,8 @@ public async Task<bool> DeleteKnowledgeDocument(string collectionName, Guid file
151152
// Get doc meta data
152153
var pageData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter
153154
{
154-
FileIds = [ fileId ],
155-
Size = 1
155+
Size = 1,
156+
FileIds = [ fileId ]
156157
});
157158

158159
// Delete doc
@@ -177,6 +178,57 @@ public async Task<bool> DeleteKnowledgeDocument(string collectionName, Guid file
177178
}
178179
}
179180

181+
public async Task<bool> DeleteKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter)
182+
{
183+
if (string.IsNullOrWhiteSpace(collectionName)) return false;
184+
185+
186+
var pageSize = filter.Size;
187+
var innerFilter = new KnowledgeFileFilter
188+
{
189+
Page = 1,
190+
Size = pageSize,
191+
FileIds = filter.FileIds,
192+
FileNames = filter.FileNames,
193+
FileSources = filter.FileSources,
194+
ContentTypes = filter.ContentTypes
195+
};
196+
197+
var pageData = await GetPagedKnowledgeDocuments(collectionName, innerFilter);
198+
199+
var total = pageData.Count;
200+
if (total == 0) return false;
201+
202+
var page = 1;
203+
var totalPages = total % pageSize == 0 ? total / pageSize : total / pageSize + 1;
204+
205+
while (page <= totalPages)
206+
{
207+
if (page > 1)
208+
{
209+
pageData = await GetPagedKnowledgeDocuments(collectionName, innerFilter);
210+
}
211+
212+
var fileIds = pageData.Items.Select(x => x.FileId).ToList();
213+
foreach (var fileId in fileIds)
214+
{
215+
try
216+
{
217+
await DeleteKnowledgeDocument(collectionName, fileId);
218+
}
219+
catch
220+
{
221+
continue;
222+
}
223+
}
224+
225+
page++;
226+
}
227+
228+
return true;
229+
}
230+
231+
180232
public async Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter)
181233
{
182234
if (string.IsNullOrWhiteSpace(collectionName))
@@ -189,11 +241,7 @@ public async Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(str
189241
var vectorStoreProvider = _settings.VectorDb.Provider;
190242

191243
// Get doc meta data
192-
var pagedData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter
193-
{
194-
Page = filter.Page,
195-
Size = filter.Size
196-
});
244+
var pagedData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, filter);
197245

198246
var files = pagedData.Items?.Select(x => new KnowledgeFileModel
199247
{
@@ -222,8 +270,8 @@ public async Task<FileBinaryDataModel> GetKnowledgeDocumentBinaryData(string col
222270
// Get doc binary data
223271
var pageData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter
224272
{
225-
FileIds = [ fileId ],
226-
Size = 1
273+
Size = 1,
274+
FileIds = [ fileId ]
227275
});
228276

229277
var metaData = pageData?.Items?.FirstOrDefault();
@@ -247,6 +295,7 @@ public async Task<FileBinaryDataModel> GetKnowledgeDocumentBinaryData(string col
247295
}
248296

249297

298+
250299
#region Private methods
251300
/// <summary>
252301
/// Get file content type and file bytes

src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,10 +191,20 @@ public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collecti
191191
docFilters.Add(builder.In(x => x.FileId, filter.FileIds));
192192
}
193193

194+
if (!filter.FileNames.IsNullOrEmpty())
195+
{
196+
docFilters.Add(builder.In(x => x.FileName, filter.FileNames));
197+
}
198+
194199
if (!filter.FileSources.IsNullOrEmpty())
195200
{
196201
docFilters.Add(builder.In(x => x.FileSource, filter.FileSources));
197202
}
203+
204+
if (!filter.ContentTypes.IsNullOrEmpty())
205+
{
206+
docFilters.Add(builder.In(x => x.ContentType, filter.ContentTypes));
207+
}
198208
}
199209

200210
var filterDef = builder.And(docFilters);

0 commit comments

Comments
 (0)