Skip to content

Commit b942e71

Browse files
authored
Merge pull request #652 from iceljc/features/add-knowledge-docs
Features/add knowledge docs
2 parents 0f37ef8 + d60ee30 commit b942e71

File tree

12 files changed

+110
-22
lines changed

12 files changed

+110
-22
lines changed

src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ public class KnowledgeFileModel
55
public Guid FileId { get; set; }
66
public string FileName { get; set; }
77
public string FileExtension { get; set; }
8+
public string FileSource { get; set; }
89
public string ContentType { get; set; }
910
public string FileUrl { get; set; }
1011
public DocMetaRefData? RefData { get; set; }

src/Infrastructure/BotSharp.Abstraction/Knowledges/Enums/KnowledgePayloadName.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ public static class KnowledgePayloadName
1111
public static string FileId = "fileId";
1212
public static string FileName = "fileName";
1313
public static string FileSource = "fileSource";
14+
public static string FileUrl = "fileUrl";
1415
}

src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/TextChopper.cs renamed to src/Infrastructure/BotSharp.Abstraction/Knowledges/Helpers/TextChopper.cs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using System.Text.RegularExpressions;
22

3-
namespace BotSharp.Plugin.KnowledgeBase.Helpers;
3+
namespace BotSharp.Abstraction.Knowledges.Helpers;
44

55
public static class TextChopper
66
{
@@ -14,18 +14,22 @@ public static List<string> Chop(string content, ChunkOption option)
1414
private static List<string> ChopByWord(string content, ChunkOption option)
1515
{
1616
var chunks = new List<string>();
17-
var words = content.Split(' ').Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
17+
var words = content.Split(' ', StringSplitOptions.RemoveEmptyEntries).Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
1818

1919
var chunk = string.Empty;
2020
for (int i = 0; i < words.Count; i++)
2121
{
22-
chunk += words[i] + " ";
22+
chunk += words[i];
2323
if (chunk.Length > option.Size)
2424
{
2525
chunks.Add(chunk.Trim());
2626
chunk = string.Empty;
2727
i -= option.Conjunction;
2828
}
29+
else
30+
{
31+
chunk += " ";
32+
}
2933
}
3034

3135
if (chunks.IsNullOrEmpty() && !string.IsNullOrEmpty(chunk))

src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ public interface IKnowledgeService
2222
#endregion
2323

2424
#region Document
25-
Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files);
25+
Task<UploadKnowledgeResponse> UploadDocumentsToKnowledge(string collectionName, IEnumerable<ExternalFileModel> files);
26+
Task<bool> ImportDocumentContentToKnowledge(string collectionName, string fileName, string fileSource, IEnumerable<string> contents, DocMetaRefData? refData = null);
2627
Task<bool> DeleteKnowledgeDocument(string collectionName, Guid fileId);
2728
Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
2829
Task<FileBinaryDataModel> GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId);

src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ public class DocMetaRefData
4242
[JsonPropertyName("name")]
4343
public string Name { get; set; }
4444

45+
[JsonPropertyName("type")]
46+
public string Type { get; set; }
47+
4548
[JsonPropertyName("url")]
4649
public string Url { get; set; }
4750

src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ namespace BotSharp.Abstraction.Knowledges.Models;
33
public class KnowledgeFileFilter : Pagination
44
{
55
public IEnumerable<Guid>? FileIds { get; set; }
6+
7+
public IEnumerable<string>? FileSources { get; set; }
68
}

src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,19 @@ public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collecti
182182
var matched = true;
183183

184184
// Apply filter
185-
if (filter != null && !filter.FileIds.IsNullOrEmpty())
185+
if (filter != null)
186186
{
187-
matched = matched && filter.FileIds.Contains(metaData.FileId);
187+
if (!filter.FileIds.IsNullOrEmpty())
188+
{
189+
matched = matched && filter.FileIds.Contains(metaData.FileId);
190+
}
191+
192+
if (!filter.FileSources.IsNullOrEmpty())
193+
{
194+
matched = matched & filter.FileSources.Contains(metaData.FileSource);
195+
}
188196
}
197+
189198

190199
if (!matched) continue;
191200

src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ public async Task<bool> DeleteVectorCollectionAllData([FromRoute] string collect
115115
[HttpPost("/knowledge/document/{collection}/upload")]
116116
public async Task<UploadKnowledgeResponse> UploadKnowledgeDocuments([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request)
117117
{
118-
var response = await _knowledgeService.UploadKnowledgeDocuments(collection, request.Files);
118+
var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, request.Files);
119119
return response;
120120
}
121121

@@ -138,7 +138,7 @@ public async Task<UploadKnowledgeResponse> UploadKnowledgeDocuments([FromRoute]
138138
});
139139
}
140140

141-
var response = await _knowledgeService.UploadKnowledgeDocuments(collection, docs);
141+
var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, docs);
142142
return response;
143143
}
144144

@@ -149,7 +149,7 @@ public async Task<bool> DeleteKnowledgeDocument([FromRoute] string collection, [
149149
return response;
150150
}
151151

152-
[HttpPost("/knowledge/document/{collection}/list")]
152+
[HttpPost("/knowledge/document/{collection}/page")]
153153
public async Task<PagedItems<KnowledgeFileViewModel>> GetPagedKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request)
154154
{
155155
var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, new KnowledgeFileFilter

src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ public class KnowledgeFileViewModel
1010
[JsonPropertyName("file_name")]
1111
public string FileName { get; set; }
1212

13+
[JsonPropertyName("file_source")]
14+
public string FileSource { get; set; }
15+
1316
[JsonPropertyName("file_extension")]
1417
public string FileExtension { get; set; }
1518

@@ -29,6 +32,7 @@ public static KnowledgeFileViewModel From(KnowledgeFileModel model)
2932
{
3033
FileId = model.FileId,
3134
FileName = model.FileName,
35+
FileSource = model.FileSource,
3236
FileExtension = model.FileExtension,
3337
ContentType = model.ContentType,
3438
FileUrl = model.FileUrl,

src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using BotSharp.Abstraction.Files;
22
using BotSharp.Abstraction.Files.Models;
33
using BotSharp.Abstraction.Files.Utilities;
4+
using BotSharp.Abstraction.Knowledges.Helpers;
45
using BotSharp.Abstraction.VectorStorage.Enums;
56
using System.Net.Http;
67
using System.Net.Mime;
@@ -9,7 +10,7 @@ namespace BotSharp.Plugin.KnowledgeBase.Services;
910

1011
public partial class KnowledgeService
1112
{
12-
public async Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files)
13+
public async Task<UploadKnowledgeResponse> UploadDocumentsToKnowledge(string collectionName, IEnumerable<ExternalFileModel> files)
1314
{
1415
if (string.IsNullOrWhiteSpace(collectionName) || files.IsNullOrEmpty())
1516
{
@@ -89,6 +90,50 @@ public async Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string colle
8990
}
9091

9192

93+
public async Task<bool> ImportDocumentContentToKnowledge(string collectionName, string fileName, string fileSource,
94+
IEnumerable<string> contents, DocMetaRefData? refData = null)
95+
{
96+
if (string.IsNullOrWhiteSpace(collectionName)
97+
|| string.IsNullOrWhiteSpace(fileName)
98+
|| contents.IsNullOrEmpty())
99+
{
100+
return false;
101+
}
102+
103+
try
104+
{
105+
var db = _services.GetRequiredService<IBotSharpRepository>();
106+
var userId = await GetUserId();
107+
var vectorStoreProvider = _settings.VectorDb.Provider;
108+
var fileId = Guid.NewGuid();
109+
var contentType = FileUtility.GetFileContentType(fileName);
110+
111+
var dataIds = await SaveToVectorDb(collectionName, fileId, fileName, contents, fileSource, fileUrl: refData?.Url);
112+
db.SaveKnolwedgeBaseFileMeta(new KnowledgeDocMetaData
113+
{
114+
Collection = collectionName,
115+
FileId = fileId,
116+
FileName = fileName,
117+
FileSource = fileSource,
118+
ContentType = contentType,
119+
VectorStoreProvider = vectorStoreProvider,
120+
VectorDataIds = dataIds,
121+
RefData = refData,
122+
CreateDate = DateTime.UtcNow,
123+
CreateUserId = userId
124+
});
125+
return true;
126+
}
127+
catch (Exception ex)
128+
{
129+
_logger.LogWarning($"Error when importing doc content to knowledgebase ({collectionName}-{fileName})" +
130+
$"\r\n{ex.Message}" +
131+
$"\r\n{ex.InnerException}");
132+
return false;
133+
}
134+
}
135+
136+
92137
public async Task<bool> DeleteKnowledgeDocument(string collectionName, Guid fileId)
93138
{
94139
if (string.IsNullOrWhiteSpace(collectionName))
@@ -154,6 +199,7 @@ public async Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(str
154199
{
155200
FileId = x.FileId,
156201
FileName = x.FileName,
202+
FileSource = x.FileSource,
157203
FileExtension = Path.GetExtension(x.FileName),
158204
ContentType = x.ContentType,
159205
FileUrl = fileStorage.GetKnowledgeBaseFileUrl(collectionName, vectorStoreProvider, x.FileId, x.FileName),
@@ -259,7 +305,7 @@ private async Task<IEnumerable<string>> ReadTxt(byte[] bytes)
259305
var lines = TextChopper.Chop(content, new ChunkOption
260306
{
261307
Size = 1024,
262-
Conjunction = 32,
308+
Conjunction = 12,
263309
SplitByWord = true,
264310
});
265311
return lines;
@@ -282,7 +328,7 @@ private bool SaveDocument(string collectionName, string vectorStoreProvider, Gui
282328

283329
private async Task<IEnumerable<string>> SaveToVectorDb(
284330
string collectionName, Guid fileId, string fileName, IEnumerable<string> contents,
285-
string fileSource = KnowledgeDocSource.Api, string vectorDataSource = VectorDataSource.File)
331+
string fileSource = KnowledgeDocSource.Api, string vectorDataSource = VectorDataSource.File, string? fileUrl = null)
286332
{
287333
if (contents.IsNullOrEmpty())
288334
{
@@ -293,19 +339,25 @@ private async Task<IEnumerable<string>> SaveToVectorDb(
293339
var vectorDb = GetVectorDb();
294340
var textEmbedding = GetTextEmbedding(collectionName);
295341

342+
var payload = new Dictionary<string, string>
343+
{
344+
{ KnowledgePayloadName.DataSource, vectorDataSource },
345+
{ KnowledgePayloadName.FileId, fileId.ToString() },
346+
{ KnowledgePayloadName.FileName, fileName },
347+
{ KnowledgePayloadName.FileSource, fileSource }
348+
};
349+
350+
if (!string.IsNullOrWhiteSpace(fileUrl))
351+
{
352+
payload[KnowledgePayloadName.FileUrl] = fileUrl;
353+
}
354+
296355
for (int i = 0; i < contents.Count(); i++)
297356
{
298357
var content = contents.ElementAt(i);
299358
var vector = await textEmbedding.GetVectorAsync(content);
300359
var dataId = Guid.NewGuid();
301-
var saved = await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary<string, string>
302-
{
303-
{ KnowledgePayloadName.DataSource, vectorDataSource },
304-
{ KnowledgePayloadName.FileId, fileId.ToString() },
305-
{ KnowledgePayloadName.FileName, fileName },
306-
{ KnowledgePayloadName.FileSource, fileSource },
307-
{ "textNumber", $"{i + 1}" }
308-
});
360+
var saved = await vectorDb.Upsert(collectionName, dataId, vector, content, payload);
309361

310362
if (!saved) continue;
311363

0 commit comments

Comments
 (0)