Foundry-Local/samples/cs/model-management-example/Program.cs at d58217da1698cc983fcf4b29390125702bd3b6df · microsoft/Foundry-Local · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
using Microsoft.AI.Foundry.Local;
using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
using System.Diagnostics;

CancellationToken ct = new CancellationToken();

var config = new Configuration
{
    AppName = "foundry_local_samples",
    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
};


// Initialize the singleton instance.
await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
var mgr = FoundryLocalManager.Instance;


// Download and register all execution providers.
await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync());


// Model catalog operations
// In this section of the code we demonstrate the various model catalog operations
// Get the model catalog object
var catalog = await mgr.GetCatalogAsync();

// List available models
Console.WriteLine("Available models for your hardware:");
var models = await catalog.ListModelsAsync();
foreach (var availableModel in models)
{
    foreach (var variant in availableModel.Variants)
    {
        Console.WriteLine($"  - Alias: {variant.Alias} (Id: {string.Join(", ", variant.Id)})");
    }
}

// List cached models (i.e. downloaded models) from the catalog
var cachedModels = await catalog.GetCachedModelsAsync();
Console.WriteLine("\nCached models:");
foreach (var cachedModel in cachedModels)
{
    Console.WriteLine($"- {cachedModel.Alias} ({cachedModel.Id})");
}


// Get a model using an alias from the catalog
var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");

// Models in Model.Variants are ordered by priority, with the highest priority first.
// The first downloaded model is selected by default.
// The highest priority is selected if no models have been downloaded.
// If the selected variant is not the highest priority, it means that Foundry Local
// has found a locally cached variant for you to improve performance (remove need to download).
Console.WriteLine("\nThe default selected model variant is: " + model.Id);
if (model.Id != model.Variants.First().Id)
{
    Debug.Assert(await model.IsCachedAsync());
    Console.WriteLine("The model variant was selected due to being locally cached.");
}


// OPTIONAL: `model` can be used directly with its currently selected variant.
//           You can explicitly select (`model.SelectVariant`) or use a specific variant from `model.Variants`
//           if you want more control over the device and/or execution provider used.
//
// Choices:
//   - Use a model variant directly from the catalog if you know the variant Id
//     - `var modelVariant = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-gpu:3")`
//
//   - Get the model variant from IModel.Variants
//     - `var modelVariant = model.Variants.First(v => v.Id == "qwen2.5-0.5b-instruct-generic-cpu:4")`
//     - `var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.GPU)`
//       - optional: update selected variant in `model` using `model.SelectVariant(modelVariant);` if you wish to use
//                   `model` in your code.

// For this example we explicitly select the CPU variant, and call SelectVariant so all the following example code
// uses the `model` instance. It would be equally valid to use `modelVariant` directly.
Console.WriteLine("Selecting CPU variant of model");
var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.CPU);
model.SelectVariant(modelVariant);


// Download the model (the method skips download if already cached)
await model.DownloadAsync(progress =>
{
    Console.Write($"\rDownloading model: {progress:F2}%");
    if (progress >= 100f)
    {
        Console.WriteLine();
    }
});

// Load the model
await model.LoadAsync();


// List loaded models (i.e. in memory) from the catalog
var loadedModels = await catalog.GetLoadedModelsAsync();
Console.WriteLine("\nLoaded models:");
foreach (var loadedModel in loadedModels)
{
    Console.WriteLine($"- {loadedModel.Alias} ({loadedModel.Id})");
}
Console.WriteLine();


// Get a chat client
var chatClient = await model.GetChatClientAsync();

// Create a chat message
List<ChatMessage> messages = new()
{
    new ChatMessage { Role = "user", Content = "Why is the sky blue?" }
};

// You can adjust settings on the chat client
chatClient.Settings.Temperature = 0.7f;
chatClient.Settings.MaxTokens = 512;

Console.WriteLine("Chat completion response:");
var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, ct);
await foreach (var chunk in streamingResponse)
{
    Console.Write(chunk.Choices[0].Message.Content);
    Console.Out.Flush();
}
Console.WriteLine();
Console.WriteLine();

// Tidy up - unload the model
Console.WriteLine($"Unloading model {model.Id}...");
await model.UnloadAsync();
Console.WriteLine("Model unloaded.");

// Show loaded models from the catalog after unload
loadedModels = await catalog.GetLoadedModelsAsync();
Console.WriteLine("\nLoaded models after unload (will be empty):");
foreach (var loadedModel in loadedModels)
{
    Console.WriteLine($"- {loadedModel.Alias} ({loadedModel.Id})");
}
Console.WriteLine();
Console.WriteLine("Sample complete.");