Skip to content

Commit b4c1cac

Browse files
committed
Refactor ProcessFile and add MultipleOutput class
Refactored ProcessFile.cs to improve structure and functionality: - Added using directives for Azure Functions, logging, and JSON. - Updated constructor to accept EmbeddingClient with null checks. - Introduced new Run method with [Function] attribute for blob triggers. - Moved env var initialization to Program.cs. - Added logging for various processing stages. - Added methods: ValidateJsonContent, GenerateContextSentence, GenerateEmbeddingsAsync, CreateCertServiceDocument. - Removed old Run method with Stream parameter and CosmosDB output binding. Updated Program.cs to configure Azure OpenAI client: - Added using directives for Azure, Azure OpenAI, and Azure Functions. - Added configuration retrieval and validation for Azure OpenAI. - Registered EmbeddingClient as a singleton service. Added MultipleOutput.cs to define MultipleOutput class: - Included properties for CertServiceDocument and ArchivedContent. - Decorated properties with [CosmosDBOutput] and [BlobOutput] attributes.
1 parent cf4ba77 commit b4c1cac

File tree

3 files changed

+141
-85
lines changed

3 files changed

+141
-85
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
using Microsoft.Azure.Functions.Worker;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
8+
namespace azure_project_generator
9+
{
10+
public class MultipleOutput
11+
{
12+
13+
[CosmosDBOutput("%CosmosDb%", "%CosmosContainerOut%", Connection = "CosmosDBConnection")]
14+
public CertServiceDocument CertServiceDocument { get; set; }
15+
16+
[BlobOutput("certdataarchive/{name}", Connection = "AzureWebJobsStorage")]
17+
public string ArchivedContent { get; set; }
18+
19+
}
20+
}
Lines changed: 96 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
using Azure.AI.OpenAI;
21
using Azure;
2+
using Azure.AI.OpenAI;
33
using Microsoft.Azure.Functions.Worker;
44
using Microsoft.Extensions.Logging;
55
using Newtonsoft.Json;
@@ -14,130 +14,143 @@ public class ProcessFile
1414
{
1515
private readonly ILogger<ProcessFile> _logger;
1616
private readonly EmbeddingClient _embeddingClient;
17-
public ProcessFile(ILogger<ProcessFile> logger)
17+
18+
public ProcessFile(ILogger<ProcessFile> logger,
19+
EmbeddingClient embeddingClient)
20+
{
21+
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
22+
_embeddingClient = embeddingClient ?? throw new ArgumentNullException(nameof(embeddingClient));
23+
}
24+
25+
[Function(nameof(ProcessFile))]
26+
public async Task<MultipleOutput> Run(
27+
[BlobTrigger("certdata/{name}", Connection = "AzureWebJobsStorage")] string content,
28+
string name)
1829
{
19-
_logger = logger;
20-
// Initialize and validate environment variables
21-
string keyFromEnvironment = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY");
22-
string endpointFromEnvironment = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT");
23-
string embeddingsDeployment = Environment.GetEnvironmentVariable("EMBEDDINGS_DEPLOYMENT");
30+
_logger.LogInformation($"Processing blob: {name}");
31+
32+
if (string.IsNullOrWhiteSpace(content))
33+
{
34+
_logger.LogError("Blob content is empty or whitespace.");
35+
return new MultipleOutput { CertServiceDocument = null, ArchivedContent = null};
36+
}
37+
38+
if (!ValidateJsonContent(content))
39+
{
40+
return new MultipleOutput { CertServiceDocument = null, ArchivedContent = null};
41+
}
2442

25-
if (string.IsNullOrEmpty(keyFromEnvironment) || string.IsNullOrEmpty(endpointFromEnvironment) || string.IsNullOrEmpty(embeddingsDeployment))
43+
var mappedServiceData = JsonConvert.DeserializeObject<MappedService>(content);
44+
if (mappedServiceData == null)
2645
{
27-
_logger.LogError("Environment variables for Azure OpenAI API are not set properly.");
28-
throw new InvalidOperationException("Required environment variables are missing.");
46+
_logger.LogError("Failed to deserialize content to MappedService.");
47+
return new MultipleOutput { CertServiceDocument = null, ArchivedContent = null};
2948
}
3049

31-
// Initialize Azure OpenAI client
32-
AzureOpenAIClient azureClient = new(
33-
new Uri(endpointFromEnvironment),
34-
new AzureKeyCredential(keyFromEnvironment));
50+
string contextSentence = GenerateContextSentence(mappedServiceData);
51+
float[] contentVector = await GenerateEmbeddingsAsync(contextSentence);
3552

36-
_embeddingClient = azureClient.GetEmbeddingClient(embeddingsDeployment);
53+
var certServiceDocument = CreateCertServiceDocument(mappedServiceData, contextSentence, contentVector);
3754

55+
_logger.LogInformation("Document created successfully.");
56+
_logger.LogInformation($"Archiving blob: {name}");
57+
58+
return new MultipleOutput
59+
{
60+
CertServiceDocument = certServiceDocument,
61+
ArchivedContent = content
62+
63+
};
3864
}
3965

40-
[Function(nameof(ProcessFile))]
41-
[CosmosDBOutput("%CosmosDb%", "%CosmosContainerOut%", Connection = "CosmosDBConnection")]
42-
public async Task<CertServiceDocument> Run(
43-
[BlobTrigger("certdata/{name}", Connection = "AzureWebJobsStorage")] Stream stream, string name)
66+
67+
private async Task<string> ReadBlobContentAsync(Stream stream)
4468
{
45-
string content;
4669
try
4770
{
48-
using var blobStreamReader = new StreamReader(stream);
49-
content = await blobStreamReader.ReadToEndAsync();
71+
using var reader = new StreamReader(stream);
72+
return await reader.ReadToEndAsync();
5073
}
5174
catch (IOException ex)
5275
{
53-
_logger.LogError($"Error reading blob content: {ex.Message}");
54-
return null;
55-
}
56-
57-
_logger.LogInformation($"C# Blob trigger function Processed blob\n Name: {name}");
58-
59-
if (string.IsNullOrWhiteSpace(content))
60-
{
61-
_logger.LogError("Blob content is empty or whitespace.");
76+
_logger.LogError(ex, "Error reading blob content");
6277
return null;
6378
}
79+
}
6480

81+
private bool ValidateJsonContent(string content)
82+
{
6583
try
6684
{
67-
ValidateJsonContent(content);
85+
var generator = new JSchemaGenerator();
86+
JSchema schema = generator.Generate(typeof(MappedService));
87+
88+
JToken jsonContent = JToken.Parse(content);
89+
bool isValid = jsonContent.IsValid(schema, out IList<string> messages);
90+
91+
if (!isValid)
92+
{
93+
foreach (var message in messages)
94+
{
95+
_logger.LogError($"Schema validation error: {message}");
96+
}
97+
}
98+
else
99+
{
100+
_logger.LogInformation("JSON content is valid against the schema.");
101+
}
102+
103+
return isValid;
68104
}
69-
catch (JsonReaderException ex)
105+
catch (JsonException ex)
70106
{
71-
_logger.LogError($"JSON parsing error: {ex.Message}");
107+
_logger.LogError(ex, "JSON parsing error during validation");
108+
return false;
72109
}
73110
catch (Exception ex)
74111
{
75-
_logger.LogError($"An unexpected error occurred: {ex.Message}");
112+
_logger.LogError(ex, "Unexpected error during JSON validation");
113+
return false;
76114
}
115+
}
77116

78-
var mappedServiceData = JsonConvert.DeserializeObject<MappedService>(content);
79-
80-
string contextSentence =
81-
$"The {mappedServiceData.CertificationCode} {mappedServiceData.CertificationName} certification includes the skill of {mappedServiceData.SkillName}. Within this skill, there is a focus on the topic of {mappedServiceData.TopicName}, particularly through the use of the service {mappedServiceData.ServiceName}.";
82-
83-
List<float> contentVector = await GenerateEmbeddings(contextSentence);
84-
CertServiceDocument certServiceDocument = new CertServiceDocument();
85-
certServiceDocument.id = Guid.NewGuid().ToString();
86-
certServiceDocument.CertificationServiceKey = $"{mappedServiceData.CertificationCode}-{mappedServiceData.ServiceName}";
87-
certServiceDocument.CertificationCode = mappedServiceData.CertificationCode;
88-
certServiceDocument.CertificationName = mappedServiceData.CertificationName;
89-
certServiceDocument.SkillName = mappedServiceData.SkillName;
90-
certServiceDocument.TopicName = mappedServiceData.TopicName;
91-
certServiceDocument.ServiceName = mappedServiceData.ServiceName;
92-
certServiceDocument.ContextSentence = contextSentence;
93-
certServiceDocument.ContextVector = contentVector.ToArray();
94-
95-
_logger.LogInformation("Document created successfully.");
96-
97-
return certServiceDocument;
117+
private string GenerateContextSentence(MappedService data) =>
118+
$"The {data.CertificationCode} {data.CertificationName} certification includes the skill of {data.SkillName}. Within this skill, there is a focus on the topic of {data.TopicName}, particularly through the use of the service {data.ServiceName}.";
98119

99-
}
100-
private async Task<List<float>> GenerateEmbeddings(string content)
120+
private async Task<float[]> GenerateEmbeddingsAsync(string content)
101121
{
102122
try
103123
{
104124
_logger.LogInformation("Generating embedding...");
105125
var embeddingResult = await _embeddingClient.GenerateEmbeddingAsync(content).ConfigureAwait(false);
106-
List<float> embeddingVector = embeddingResult.Value.Vector.ToArray().ToList();
107126
_logger.LogInformation("Embedding created successfully.");
108-
return embeddingVector;
127+
return embeddingResult.Value.Vector.ToArray();
128+
109129
}
110130
catch (RequestFailedException ex)
111131
{
112-
_logger.LogError($"Azure OpenAI API request failed: {ex.Message}");
113-
throw; // Re-throw the exception to ensure the caller is aware of the failure
132+
_logger.LogError(ex, "Azure OpenAI API request failed");
133+
throw;
114134
}
115135
catch (Exception ex)
116136
{
117-
_logger.LogError($"Error generating embedding: {ex.Message}");
118-
throw; // Re-throw the exception to ensure the caller is aware of the failure
137+
_logger.LogError(ex, "Error generating embedding");
138+
throw;
119139
}
120140
}
121-
private void ValidateJsonContent(string content)
122-
{
123-
var generator = new JSchemaGenerator();
124-
JSchema schema = generator.Generate(typeof(MappedService));
125-
126-
JToken jsonContent = JToken.Parse(content);
127-
IList<string> messages;
128-
bool valid = jsonContent.IsValid(schema, out messages);
129141

130-
if (!valid)
142+
private CertServiceDocument CreateCertServiceDocument(MappedService data, string contextSentence, float[] contentVector) =>
143+
new CertServiceDocument
131144
{
132-
foreach (var message in messages)
133-
{
134-
_logger.LogError($"Schema validation error: {message}");
135-
}
136-
}
137-
else
138-
{
139-
_logger.LogInformation("JSON content is valid against the schema.");
140-
}
141-
}
145+
id = Guid.NewGuid().ToString(),
146+
CertificationServiceKey = $"{data.CertificationCode}-{data.ServiceName}",
147+
CertificationCode = data.CertificationCode,
148+
CertificationName = data.CertificationName,
149+
SkillName = data.SkillName,
150+
TopicName = data.TopicName,
151+
ServiceName = data.ServiceName,
152+
ContextSentence = contextSentence,
153+
ContextVector = contentVector
154+
};
142155
}
143-
}
156+
}

azure-project-generator/Program.cs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,37 @@
1+
using Azure;
2+
using Azure.AI.OpenAI;
13
using Microsoft.Azure.Functions.Worker;
4+
using Microsoft.Extensions.Configuration;
25
using Microsoft.Extensions.DependencyInjection;
36
using Microsoft.Extensions.Hosting;
47

58
var host = new HostBuilder()
69
.ConfigureFunctionsWorkerDefaults()
7-
.ConfigureServices(services =>
10+
.ConfigureServices((context, services) =>
811
{
912
services.AddApplicationInsightsTelemetryWorkerService();
1013
services.ConfigureFunctionsApplicationInsights();
14+
15+
// Get configuration
16+
var config = context.Configuration;
17+
18+
// Initialize Azure OpenAI client
19+
string keyFromEnvironment = config["AZURE_OPENAI_API_KEY"];
20+
string endpointFromEnvironment = config["AZURE_OPENAI_API_ENDPOINT"];
21+
string embeddingsDeployment = config["EMBEDDINGS_DEPLOYMENT"];
22+
23+
if (string.IsNullOrEmpty(keyFromEnvironment) || string.IsNullOrEmpty(endpointFromEnvironment) || string.IsNullOrEmpty(embeddingsDeployment))
24+
{
25+
throw new InvalidOperationException("Required Azure OpenAI configuration is missing.");
26+
}
27+
28+
AzureOpenAIClient azureClient = new(
29+
new Uri(endpointFromEnvironment),
30+
new AzureKeyCredential(keyFromEnvironment));
31+
32+
// Register EmbeddingClient as a singleton
33+
services.AddSingleton(azureClient.GetEmbeddingClient(embeddingsDeployment));
1134
})
1235
.Build();
1336

14-
host.Run();
37+
host.Run();

0 commit comments

Comments
 (0)