1
- using Azure . AI . OpenAI ;
2
1
using Azure ;
2
+ using Azure . AI . OpenAI ;
3
3
using Microsoft . Azure . Functions . Worker ;
4
4
using Microsoft . Extensions . Logging ;
5
5
using Newtonsoft . Json ;
@@ -14,130 +14,143 @@ public class ProcessFile
14
14
{
15
15
private readonly ILogger < ProcessFile > _logger ;
16
16
private readonly EmbeddingClient _embeddingClient ;
17
- public ProcessFile ( ILogger < ProcessFile > logger )
17
+
18
+ public ProcessFile ( ILogger < ProcessFile > logger ,
19
+ EmbeddingClient embeddingClient )
20
+ {
21
+ _logger = logger ?? throw new ArgumentNullException ( nameof ( logger ) ) ;
22
+ _embeddingClient = embeddingClient ?? throw new ArgumentNullException ( nameof ( embeddingClient ) ) ;
23
+ }
24
+
25
+ [ Function ( nameof ( ProcessFile ) ) ]
26
+ public async Task < MultipleOutput > Run (
27
+ [ BlobTrigger ( "certdata/{name}" , Connection = "AzureWebJobsStorage" ) ] string content ,
28
+ string name )
18
29
{
19
- _logger = logger ;
20
- // Initialize and validate environment variables
21
- string keyFromEnvironment = Environment . GetEnvironmentVariable ( "AZURE_OPENAI_API_KEY" ) ;
22
- string endpointFromEnvironment = Environment . GetEnvironmentVariable ( "AZURE_OPENAI_API_ENDPOINT" ) ;
23
- string embeddingsDeployment = Environment . GetEnvironmentVariable ( "EMBEDDINGS_DEPLOYMENT" ) ;
30
+ _logger . LogInformation ( $ "Processing blob: { name } ") ;
31
+
32
+ if ( string . IsNullOrWhiteSpace ( content ) )
33
+ {
34
+ _logger . LogError ( "Blob content is empty or whitespace." ) ;
35
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
36
+ }
37
+
38
+ if ( ! ValidateJsonContent ( content ) )
39
+ {
40
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
41
+ }
24
42
25
- if ( string . IsNullOrEmpty ( keyFromEnvironment ) || string . IsNullOrEmpty ( endpointFromEnvironment ) || string . IsNullOrEmpty ( embeddingsDeployment ) )
43
+ var mappedServiceData = JsonConvert . DeserializeObject < MappedService > ( content ) ;
44
+ if ( mappedServiceData == null )
26
45
{
27
- _logger . LogError ( "Environment variables for Azure OpenAI API are not set properly ." ) ;
28
- throw new InvalidOperationException ( "Required environment variables are missing." ) ;
46
+ _logger . LogError ( "Failed to deserialize content to MappedService ." ) ;
47
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
29
48
}
30
49
31
- // Initialize Azure OpenAI client
32
- AzureOpenAIClient azureClient = new (
33
- new Uri ( endpointFromEnvironment ) ,
34
- new AzureKeyCredential ( keyFromEnvironment ) ) ;
50
+ string contextSentence = GenerateContextSentence ( mappedServiceData ) ;
51
+ float [ ] contentVector = await GenerateEmbeddingsAsync ( contextSentence ) ;
35
52
36
- _embeddingClient = azureClient . GetEmbeddingClient ( embeddingsDeployment ) ;
53
+ var certServiceDocument = CreateCertServiceDocument ( mappedServiceData , contextSentence , contentVector ) ;
37
54
55
+ _logger . LogInformation ( "Document created successfully." ) ;
56
+ _logger . LogInformation ( $ "Archiving blob: { name } ") ;
57
+
58
+ return new MultipleOutput
59
+ {
60
+ CertServiceDocument = certServiceDocument ,
61
+ ArchivedContent = content
62
+
63
+ } ;
38
64
}
39
65
40
- [ Function ( nameof ( ProcessFile ) ) ]
41
- [ CosmosDBOutput ( "%CosmosDb%" , "%CosmosContainerOut%" , Connection = "CosmosDBConnection" ) ]
42
- public async Task < CertServiceDocument > Run (
43
- [ BlobTrigger ( "certdata/{name}" , Connection = "AzureWebJobsStorage" ) ] Stream stream , string name )
66
+
67
+ private async Task < string > ReadBlobContentAsync ( Stream stream )
44
68
{
45
- string content ;
46
69
try
47
70
{
48
- using var blobStreamReader = new StreamReader ( stream ) ;
49
- content = await blobStreamReader . ReadToEndAsync ( ) ;
71
+ using var reader = new StreamReader ( stream ) ;
72
+ return await reader . ReadToEndAsync ( ) ;
50
73
}
51
74
catch ( IOException ex )
52
75
{
53
- _logger . LogError ( $ "Error reading blob content: { ex . Message } ") ;
54
- return null ;
55
- }
56
-
57
- _logger . LogInformation ( $ "C# Blob trigger function Processed blob\n Name: { name } ") ;
58
-
59
- if ( string . IsNullOrWhiteSpace ( content ) )
60
- {
61
- _logger . LogError ( "Blob content is empty or whitespace." ) ;
76
+ _logger . LogError ( ex , "Error reading blob content" ) ;
62
77
return null ;
63
78
}
79
+ }
64
80
81
+ private bool ValidateJsonContent ( string content )
82
+ {
65
83
try
66
84
{
67
- ValidateJsonContent ( content ) ;
85
+ var generator = new JSchemaGenerator ( ) ;
86
+ JSchema schema = generator . Generate ( typeof ( MappedService ) ) ;
87
+
88
+ JToken jsonContent = JToken . Parse ( content ) ;
89
+ bool isValid = jsonContent . IsValid ( schema , out IList < string > messages ) ;
90
+
91
+ if ( ! isValid )
92
+ {
93
+ foreach ( var message in messages )
94
+ {
95
+ _logger . LogError ( $ "Schema validation error: { message } ") ;
96
+ }
97
+ }
98
+ else
99
+ {
100
+ _logger . LogInformation ( "JSON content is valid against the schema." ) ;
101
+ }
102
+
103
+ return isValid ;
68
104
}
69
- catch ( JsonReaderException ex )
105
+ catch ( JsonException ex )
70
106
{
71
- _logger . LogError ( $ "JSON parsing error: { ex . Message } ") ;
107
+ _logger . LogError ( ex , "JSON parsing error during validation" ) ;
108
+ return false ;
72
109
}
73
110
catch ( Exception ex )
74
111
{
75
- _logger . LogError ( $ "An unexpected error occurred: { ex . Message } ") ;
112
+ _logger . LogError ( ex , "Unexpected error during JSON validation" ) ;
113
+ return false ;
76
114
}
115
+ }
77
116
78
- var mappedServiceData = JsonConvert . DeserializeObject < MappedService > ( content ) ;
79
-
80
- string contextSentence =
81
- $ "The { mappedServiceData . CertificationCode } { mappedServiceData . CertificationName } certification includes the skill of { mappedServiceData . SkillName } . Within this skill, there is a focus on the topic of { mappedServiceData . TopicName } , particularly through the use of the service { mappedServiceData . ServiceName } .";
82
-
83
- List < float > contentVector = await GenerateEmbeddings ( contextSentence ) ;
84
- CertServiceDocument certServiceDocument = new CertServiceDocument ( ) ;
85
- certServiceDocument . id = Guid . NewGuid ( ) . ToString ( ) ;
86
- certServiceDocument . CertificationServiceKey = $ "{ mappedServiceData . CertificationCode } -{ mappedServiceData . ServiceName } ";
87
- certServiceDocument . CertificationCode = mappedServiceData . CertificationCode ;
88
- certServiceDocument . CertificationName = mappedServiceData . CertificationName ;
89
- certServiceDocument . SkillName = mappedServiceData . SkillName ;
90
- certServiceDocument . TopicName = mappedServiceData . TopicName ;
91
- certServiceDocument . ServiceName = mappedServiceData . ServiceName ;
92
- certServiceDocument . ContextSentence = contextSentence ;
93
- certServiceDocument . ContextVector = contentVector . ToArray ( ) ;
94
-
95
- _logger . LogInformation ( "Document created successfully." ) ;
96
-
97
- return certServiceDocument ;
117
+ private string GenerateContextSentence ( MappedService data ) =>
118
+ $ "The { data . CertificationCode } { data . CertificationName } certification includes the skill of { data . SkillName } . Within this skill, there is a focus on the topic of { data . TopicName } , particularly through the use of the service { data . ServiceName } .";
98
119
99
- }
100
- private async Task < List < float > > GenerateEmbeddings ( string content )
120
+ private async Task < float [ ] > GenerateEmbeddingsAsync ( string content )
101
121
{
102
122
try
103
123
{
104
124
_logger . LogInformation ( "Generating embedding..." ) ;
105
125
var embeddingResult = await _embeddingClient . GenerateEmbeddingAsync ( content ) . ConfigureAwait ( false ) ;
106
- List < float > embeddingVector = embeddingResult . Value . Vector . ToArray ( ) . ToList ( ) ;
107
126
_logger . LogInformation ( "Embedding created successfully." ) ;
108
- return embeddingVector ;
127
+ return embeddingResult . Value . Vector . ToArray ( ) ;
128
+
109
129
}
110
130
catch ( RequestFailedException ex )
111
131
{
112
- _logger . LogError ( $ "Azure OpenAI API request failed: { ex . Message } ") ;
113
- throw ; // Re-throw the exception to ensure the caller is aware of the failure
132
+ _logger . LogError ( ex , "Azure OpenAI API request failed" ) ;
133
+ throw ;
114
134
}
115
135
catch ( Exception ex )
116
136
{
117
- _logger . LogError ( $ "Error generating embedding: { ex . Message } ") ;
118
- throw ; // Re-throw the exception to ensure the caller is aware of the failure
137
+ _logger . LogError ( ex , "Error generating embedding" ) ;
138
+ throw ;
119
139
}
120
140
}
121
- private void ValidateJsonContent ( string content )
122
- {
123
- var generator = new JSchemaGenerator ( ) ;
124
- JSchema schema = generator . Generate ( typeof ( MappedService ) ) ;
125
-
126
- JToken jsonContent = JToken . Parse ( content ) ;
127
- IList < string > messages ;
128
- bool valid = jsonContent . IsValid ( schema , out messages ) ;
129
141
130
- if ( ! valid )
142
+ private CertServiceDocument CreateCertServiceDocument ( MappedService data , string contextSentence , float [ ] contentVector ) =>
143
+ new CertServiceDocument
131
144
{
132
- foreach ( var message in messages )
133
- {
134
- _logger . LogError ( $ "Schema validation error: { message } " ) ;
135
- }
136
- }
137
- else
138
- {
139
- _logger . LogInformation ( "JSON content is valid against the schema." ) ;
140
- }
141
- }
145
+ id = Guid . NewGuid ( ) . ToString ( ) ,
146
+ CertificationServiceKey = $ " { data . CertificationCode } - { data . ServiceName } " ,
147
+ CertificationCode = data . CertificationCode ,
148
+ CertificationName = data . CertificationName ,
149
+ SkillName = data . SkillName ,
150
+ TopicName = data . TopicName ,
151
+ ServiceName = data . ServiceName ,
152
+ ContextSentence = contextSentence ,
153
+ ContextVector = contentVector
154
+ } ;
142
155
}
143
- }
156
+ }
0 commit comments