1+ using System . Net ;
2+ using System . Text . Json ;
13using Azure . AISearch . FunctionApp . Models ;
24using Azure . AISearch . FunctionApp . Services ;
35using Microsoft . AspNetCore . Http ;
46using Microsoft . AspNetCore . Mvc ;
5- using Microsoft . Azure . WebJobs ;
6- using Microsoft . Azure . WebJobs . Extensions . Http ;
7+ using Microsoft . Azure . Functions . Worker ;
8+ using Microsoft . Azure . Functions . Worker . Http ;
79using Microsoft . Extensions . Configuration ;
810using Microsoft . Extensions . Logging ;
9- using Newtonsoft . Json ;
1011
1112namespace Azure . AISearch . FunctionApp ;
1213
1314public class ChunkEmbedPush
1415{
16+ private readonly ILogger logger ;
1517 private readonly AppSettings settings ;
1618 private readonly SemanticKernelChunkingService chunkingService ;
1719 private readonly AzureOpenAIEmbeddingService embeddingService ;
1820 private readonly AzureCognitiveSearchService searchService ;
1921
20- public ChunkEmbedPush ( IConfiguration configuration , SemanticKernelChunkingService chunkingService , AzureOpenAIEmbeddingService embeddingService , AzureCognitiveSearchService searchService )
22+ public ChunkEmbedPush ( ILoggerFactory loggerFactory , IConfiguration configuration , SemanticKernelChunkingService chunkingService , AzureOpenAIEmbeddingService embeddingService , AzureCognitiveSearchService searchService )
2123 {
22- this . settings = configuration . Get < AppSettings > ( ) ;
24+ this . logger = loggerFactory . CreateLogger < ChunkEmbedPush > ( ) ;
25+ var settings = configuration . Get < AppSettings > ( ) ;
26+ ArgumentNullException . ThrowIfNull ( settings ) ;
27+ this . settings = settings ;
2328 this . chunkingService = chunkingService ;
2429 this . embeddingService = embeddingService ;
2530 this . searchService = searchService ;
2631 }
2732
28- [ FunctionName ( nameof ( ChunkEmbedPush ) ) ]
29- public async Task < IActionResult > Run ( [ HttpTrigger ( AuthorizationLevel . Anonymous , "POST" ) ] HttpRequest request , ILogger log )
33+ [ Function ( nameof ( ChunkEmbedPush ) ) ]
34+ public async Task < IActionResult > Run ( [ HttpTrigger ( AuthorizationLevel . Anonymous , "POST" ) ] HttpRequest request )
3035 {
31- log . LogInformation ( "Skill request received" ) ;
36+ this . logger . LogInformation ( "Skill request received" ) ;
3237
3338 // Use basic API key authentication for demo purposes to avoid a dependency on the Function App keys.
3439 if ( ! string . IsNullOrWhiteSpace ( this . settings . TextEmbedderFunctionApiKey ) )
@@ -41,62 +46,65 @@ public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous,
4146 }
4247
4348 // Get the skill request.
44- var skillRequestJson = await request . ReadAsStringAsync ( ) ;
45- var skillRequest = JsonConvert . DeserializeObject < SkillRequest > ( skillRequestJson ) ;
4649 var skillResponse = new SkillResponse ( ) ;
47-
48- if ( skillRequest ? . Values != null )
50+ using var bodyReader = new StreamReader ( request . Body ) ;
51+ var skillRequestJson = await bodyReader . ReadToEndAsync ( ) ;
52+ if ( ! string . IsNullOrWhiteSpace ( skillRequestJson ) )
4953 {
50- // Process all records in the request.
51- foreach ( var record in skillRequest . Values )
54+ var skillRequest = JsonSerializer . Deserialize < SkillRequest > ( skillRequestJson ) ;
55+ if ( skillRequest ? . Values != null )
5256 {
53- log . LogInformation ( $ "Processing record \" { record . RecordId } \" with document id \" { record . Data . DocumentId } \" and filepath \" { record . Data . FilePath } \" ." ) ;
54- var responseRecord = new SkillResponseRecord
57+ // Process all records in the request.
58+ foreach ( var record in skillRequest . Values )
5559 {
56- RecordId = record . RecordId
57- } ;
58- skillResponse . Values . Add ( responseRecord ) ;
59-
60- // Use default settings if not specified in the request.
61- record . Data . NumTokens = record . Data . NumTokens ?? this . settings . TextEmbedderNumTokens ?? 2048 ;
62- record . Data . TokenOverlap = record . Data . TokenOverlap ?? this . settings . TextEmbedderTokenOverlap ?? 0 ;
63- record . Data . MinChunkSize = record . Data . MinChunkSize ?? this . settings . TextEmbedderMinChunkSize ?? 10 ;
64- record . Data . EmbeddingDeploymentName = record . Data . EmbeddingDeploymentName ?? this . settings . OpenAIEmbeddingDeployment ?? throw new InvalidOperationException ( "No embedding deployment name specified." ) ;
60+ this . logger . LogInformation ( $ "Processing record \" { record . RecordId } \" with document id \" { record . Data . DocumentId } \" and filepath \" { record . Data . FilePath } \" .") ;
61+ var responseRecord = new SkillResponseRecord
62+ {
63+ RecordId = record . RecordId
64+ } ;
65+ skillResponse . Values . Add ( responseRecord ) ;
6566
66- if ( ! string . IsNullOrWhiteSpace ( record . Data . Text ) )
67- {
68- // Generate chunks for the text in the record.
69- log . LogInformation ( $ "Chunking to { record . Data . NumTokens } tokens (min chunk size is { record . Data . MinChunkSize } , token overlap is { record . Data . TokenOverlap } ).") ;
70- var chunks = this . chunkingService . GetChunks ( record . Data ) ;
71- var chunksToProcess = chunks . Where ( c => this . chunkingService . EstimateChunkSize ( c ) >= record . Data . MinChunkSize ) . ToList ( ) ;
72- responseRecord . Data . SkippedChunks = chunks . Count - chunksToProcess . Count ;
73- log . LogInformation ( $ "Skipping { responseRecord . Data . SkippedChunks } chunk(s) with an estimated token size below the minimum chunk size.") ;
67+ // Use default settings if not specified in the request.
68+ record . Data . NumTokens = record . Data . NumTokens ?? this . settings . TextEmbedderNumTokens ?? 2048 ;
69+ record . Data . TokenOverlap = record . Data . TokenOverlap ?? this . settings . TextEmbedderTokenOverlap ?? 0 ;
70+ record . Data . MinChunkSize = record . Data . MinChunkSize ?? this . settings . TextEmbedderMinChunkSize ?? 10 ;
71+ record . Data . EmbeddingDeploymentName = record . Data . EmbeddingDeploymentName ?? this . settings . OpenAIEmbeddingDeployment ?? throw new InvalidOperationException ( "No embedding deployment name specified." ) ;
7472
75- log . LogInformation ( $ "Generating embeddings for { chunks . Count } chunk(s) using deployment \" { record . Data . EmbeddingDeploymentName } \" .") ;
76- var index = 0 ;
77- var documentChunks = new List < DocumentChunk > ( ) ;
78- foreach ( var chunk in chunks )
73+ if ( ! string . IsNullOrWhiteSpace ( record . Data . Text ) )
7974 {
80- // For each chunk, generate an embedding.
81- var embedding = await this . embeddingService . GetEmbeddingAsync ( record . Data . EmbeddingDeploymentName , chunk ) ;
75+ // Generate chunks for the text in the record.
76+ this . logger . LogInformation ( $ "Chunking to { record . Data . NumTokens } tokens (min chunk size is { record . Data . MinChunkSize } , token overlap is { record . Data . TokenOverlap } ).") ;
77+ var chunks = this . chunkingService . GetChunks ( record . Data ) ;
78+ var chunksToProcess = chunks . Where ( c => this . chunkingService . EstimateChunkSize ( c ) >= record . Data . MinChunkSize ) . ToList ( ) ;
79+ responseRecord . Data . SkippedChunks = chunks . Count - chunksToProcess . Count ;
80+ this . logger . LogInformation ( $ "Skipping { responseRecord . Data . SkippedChunks } chunk(s) with an estimated token size below the minimum chunk size.") ;
8281
83- // For each chunk with its embedding, create a document to be stored in the search index.
84- var documentChunk = new DocumentChunk
82+ this . logger . LogInformation ( $ "Generating embeddings for { chunks . Count } chunk(s) using deployment \" { record . Data . EmbeddingDeploymentName } \" .") ;
83+ var index = 0 ;
84+ var documentChunks = new List < DocumentChunk > ( ) ;
85+ foreach ( var chunk in chunks )
8586 {
86- Id = $ "{ record . Data . DocumentId } -{ index } ",
87- Content = chunk ,
88- ContentVector = embedding ,
89- SourceDocumentId = record . Data . DocumentId ,
90- SourceDocumentTitle = record . Data . Title ,
91- SourceDocumentFilePath = record . Data . FilePath
92- } ;
93- documentChunks . Add ( documentChunk ) ;
94- index ++ ;
95- }
87+ // For each chunk, generate an embedding.
88+ var embedding = await this . embeddingService . GetEmbeddingAsync ( record . Data . EmbeddingDeploymentName , chunk ) ;
89+
90+ // For each chunk with its embedding, create a document to be stored in the search index.
91+ var documentChunk = new DocumentChunk
92+ {
93+ Id = $ "{ record . Data . DocumentId } -{ index } ",
94+ Content = chunk ,
95+ ContentVector = embedding ,
96+ SourceDocumentId = record . Data . DocumentId ,
97+ SourceDocumentTitle = record . Data . Title ,
98+ SourceDocumentFilePath = record . Data . FilePath
99+ } ;
100+ documentChunks . Add ( documentChunk ) ;
101+ index ++ ;
102+ }
96103
97- // Store the document chunks in the search index.
98- log . LogInformation ( $ "Uploading { documentChunks . Count } document chunk(s) to search service.") ;
99- await this . searchService . UploadDocumentChunksAsync ( record . Data . DocumentId , documentChunks ) ;
104+ // Store the document chunks in the search index.
105+ this . logger . LogInformation ( $ "Uploading { documentChunks . Count } document chunk(s) to search service.") ;
106+ await this . searchService . UploadDocumentChunksAsync ( record . Data . DocumentId , documentChunks ) ;
107+ }
100108 }
101109 }
102110 }
0 commit comments