1
+ using Azure ;
2
+ using Azure . AI . OpenAI ;
3
+ using Microsoft . Azure . Functions . Worker ;
4
+ using Microsoft . Extensions . Logging ;
5
+ using Newtonsoft . Json ;
6
+ using Newtonsoft . Json . Linq ;
7
+ using Newtonsoft . Json . Schema ;
8
+ using Newtonsoft . Json . Schema . Generation ;
9
+ using OpenAI . Embeddings ;
10
+
11
+ namespace azure_project_generator
12
+ {
13
+ public class ProcessFile
14
+ {
15
+ private readonly ILogger < ProcessFile > _logger ;
16
+ private readonly EmbeddingClient _embeddingClient ;
17
+
18
+ public ProcessFile ( ILogger < ProcessFile > logger ,
19
+ EmbeddingClient embeddingClient )
20
+ {
21
+ _logger = logger ?? throw new ArgumentNullException ( nameof ( logger ) ) ;
22
+ _embeddingClient = embeddingClient ?? throw new ArgumentNullException ( nameof ( embeddingClient ) ) ;
23
+ }
24
+
25
+ [ Function ( nameof ( ProcessFile ) ) ]
26
+ public async Task < MultipleOutput > Run (
27
+ [ BlobTrigger ( "certdata/{name}" , Connection = "AzureWebJobsStorage" ) ] string content ,
28
+ string name )
29
+ {
30
+ _logger . LogInformation ( $ "Processing blob: { name } ") ;
31
+
32
+ if ( string . IsNullOrWhiteSpace ( content ) )
33
+ {
34
+ _logger . LogError ( "Blob content is empty or whitespace." ) ;
35
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
36
+ }
37
+
38
+ if ( ! ValidateJsonContent ( content ) )
39
+ {
40
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
41
+ }
42
+
43
+ var mappedServiceData = JsonConvert . DeserializeObject < MappedService > ( content ) ;
44
+ if ( mappedServiceData == null )
45
+ {
46
+ _logger . LogError ( "Failed to deserialize content to MappedService." ) ;
47
+ return new MultipleOutput { CertServiceDocument = null , ArchivedContent = null } ;
48
+ }
49
+
50
+ string contextSentence = GenerateContextSentence ( mappedServiceData ) ;
51
+ float [ ] contentVector = await GenerateEmbeddingsAsync ( contextSentence ) ;
52
+
53
+ var certServiceDocument = CreateCertServiceDocument ( mappedServiceData , contextSentence , contentVector ) ;
54
+
55
+ _logger . LogInformation ( "Document created successfully." ) ;
56
+ _logger . LogInformation ( $ "Archiving blob: { name } ") ;
57
+
58
+ return new MultipleOutput
59
+ {
60
+ CertServiceDocument = certServiceDocument ,
61
+ ArchivedContent = content
62
+
63
+ } ;
64
+ }
65
+
66
+
67
+ private async Task < string > ReadBlobContentAsync ( Stream stream )
68
+ {
69
+ try
70
+ {
71
+ using var reader = new StreamReader ( stream ) ;
72
+ return await reader . ReadToEndAsync ( ) ;
73
+ }
74
+ catch ( IOException ex )
75
+ {
76
+ _logger . LogError ( ex , "Error reading blob content" ) ;
77
+ return null ;
78
+ }
79
+ }
80
+
81
+ private bool ValidateJsonContent ( string content )
82
+ {
83
+ try
84
+ {
85
+ var generator = new JSchemaGenerator ( ) ;
86
+ JSchema schema = generator . Generate ( typeof ( MappedService ) ) ;
87
+
88
+ JToken jsonContent = JToken . Parse ( content ) ;
89
+ bool isValid = jsonContent . IsValid ( schema , out IList < string > messages ) ;
90
+
91
+ if ( ! isValid )
92
+ {
93
+ foreach ( var message in messages )
94
+ {
95
+ _logger . LogError ( $ "Schema validation error: { message } ") ;
96
+ }
97
+ }
98
+ else
99
+ {
100
+ _logger . LogInformation ( "JSON content is valid against the schema." ) ;
101
+ }
102
+
103
+ return isValid ;
104
+ }
105
+ catch ( JsonException ex )
106
+ {
107
+ _logger . LogError ( ex , "JSON parsing error during validation" ) ;
108
+ return false ;
109
+ }
110
+ catch ( Exception ex )
111
+ {
112
+ _logger . LogError ( ex , "Unexpected error during JSON validation" ) ;
113
+ return false ;
114
+ }
115
+ }
116
+
117
+ private string GenerateContextSentence ( MappedService data ) =>
118
+ $ "The { data . CertificationCode } { data . CertificationName } certification includes the skill of { data . SkillName } . Within this skill, there is a focus on the topic of { data . TopicName } , particularly through the use of the service { data . ServiceName } .";
119
+
120
+ private async Task < float [ ] > GenerateEmbeddingsAsync ( string content )
121
+ {
122
+ try
123
+ {
124
+ _logger . LogInformation ( "Generating embedding..." ) ;
125
+ var embeddingResult = await _embeddingClient . GenerateEmbeddingAsync ( content ) . ConfigureAwait ( false ) ;
126
+ _logger . LogInformation ( "Embedding created successfully." ) ;
127
+ return embeddingResult . Value . Vector . ToArray ( ) ;
128
+
129
+ }
130
+ catch ( RequestFailedException ex )
131
+ {
132
+ _logger . LogError ( ex , "Azure OpenAI API request failed" ) ;
133
+ throw ;
134
+ }
135
+ catch ( Exception ex )
136
+ {
137
+ _logger . LogError ( ex , "Error generating embedding" ) ;
138
+ throw ;
139
+ }
140
+ }
141
+
142
+ private CertServiceDocument CreateCertServiceDocument ( MappedService data , string contextSentence , float [ ] contentVector ) =>
143
+ new CertServiceDocument
144
+ {
145
+ id = Guid . NewGuid ( ) . ToString ( ) ,
146
+ CertificationServiceKey = $ "{ data . CertificationCode } -{ data . ServiceName } ",
147
+ CertificationCode = data . CertificationCode ,
148
+ CertificationName = data . CertificationName ,
149
+ SkillName = data . SkillName ,
150
+ TopicName = data . TopicName ,
151
+ ServiceName = data . ServiceName ,
152
+ ContextSentence = contextSentence ,
153
+ ContextVector = contentVector
154
+ } ;
155
+ }
156
+ }
0 commit comments