Skip to content

Commit f67382e

Browse files
authored
Add Llm.Runner project (#29)
1 parent 3886671 commit f67382e

File tree

10 files changed

+194
-5
lines changed

10 files changed

+194
-5
lines changed

Llm.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ci", "ci", "{655261D2-06EE-
4141
EndProject
4242
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Llm.Benchmarks", "src\Llm.Benchmarks\Llm.Benchmarks.csproj", "{690F1276-E1B8-434C-AF45-B8C5BBB66B2A}"
4343
EndProject
44+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Llm.Runner", "src\Llm.Runner\Llm.Runner.csproj", "{5C43627A-0CC5-4222-B759-7469DAFB642C}"
45+
EndProject
4446
Global
4547
GlobalSection(SolutionConfigurationPlatforms) = preSolution
4648
Debug|Any CPU = Debug|Any CPU
@@ -59,6 +61,10 @@ Global
5961
{690F1276-E1B8-434C-AF45-B8C5BBB66B2A}.Debug|Any CPU.Build.0 = Debug|Any CPU
6062
{690F1276-E1B8-434C-AF45-B8C5BBB66B2A}.Release|Any CPU.ActiveCfg = Release|Any CPU
6163
{690F1276-E1B8-434C-AF45-B8C5BBB66B2A}.Release|Any CPU.Build.0 = Release|Any CPU
64+
{5C43627A-0CC5-4222-B759-7469DAFB642C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
65+
{5C43627A-0CC5-4222-B759-7469DAFB642C}.Debug|Any CPU.Build.0 = Debug|Any CPU
66+
{5C43627A-0CC5-4222-B759-7469DAFB642C}.Release|Any CPU.ActiveCfg = Release|Any CPU
67+
{5C43627A-0CC5-4222-B759-7469DAFB642C}.Release|Any CPU.Build.0 = Release|Any CPU
6268
EndGlobalSection
6369
GlobalSection(SolutionProperties) = preSolution
6470
HideSolutionNode = FALSE
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Name;Mean [ms]
2+
Llm_nietras;3540
3+
Llm;11928
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| Name| Mean [ms]|
2+
|---------------:|---------------:|
3+
| Llm_nietras| 3540|
4+
| Llm| 11928|
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Name;Mean [ms]
2+
Llm_nietras;827
3+
Llm;4422

run.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
dotnet run -c Release -f net8.0 --project src\Llm\Llm.csproj
1+
dotnet run -c Release -f net8.0 --project src\Llm.Runner\Llm.Runner.csproj

src/Llm.Runner/Llm.Runner.csproj

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<RootNamespace>nietras.LargeLanguageModel.Benchmarks</RootNamespace>
5+
<OutputType>Exe</OutputType>
6+
</PropertyGroup>
7+
8+
<PropertyGroup>
9+
<PlatformTarget>AnyCPU</PlatformTarget>
10+
<DebugType>pdbonly</DebugType>
11+
<DebugSymbols>true</DebugSymbols>
12+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
13+
<IsPackable>false</IsPackable>
14+
</PropertyGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\Llm\Llm.csproj" />
18+
</ItemGroup>
19+
20+
<ItemGroup>
21+
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
22+
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" />
23+
<PackageReference Include="Sep" Version="0.5.3" />
24+
</ItemGroup>
25+
26+
</Project>

src/Llm.Runner/Program.cs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Type 'Program' can be sealed because it has no subtypes in its containing assembly and is not externally visible
2+
#pragma warning disable CA1852
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Diagnostics;
6+
using System.IO;
7+
using System.Linq;
8+
using System.Net.Http;
9+
using System.Reflection;
10+
using System.Runtime.CompilerServices;
11+
using BenchmarkDotNet.Environments;
12+
using nietras.LargeLanguageModel;
13+
using nietras.SeparatedValues;
14+
[assembly: System.Runtime.InteropServices.ComVisible(false)]
15+
16+
Action<string> log = t => { Console.WriteLine(t); Trace.WriteLine(t); };
17+
18+
var location = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
19+
var dataDirectory = Path.Combine(location!, "../../../");
20+
21+
log($"{Environment.Version} args: {args.Length}");
22+
23+
var name = args?.Length > 0 ? args[0] : LlmFactory.DefaultName;
24+
log($"Llm '{name}'");
25+
var llm = LlmFactory.NameToCreate[name]();
26+
27+
// Download the model and tokenizer files if they don't exist
28+
DownloadBinaryFilesIfNotExists(Gpt2.FileNames, Gpt2.RemoteUrl, dataDirectory, log);
29+
30+
// Log to file too for reference
31+
var logFilePath = Path.Combine(dataDirectory, $"{name}.log");
32+
using var logWriter = new StreamWriter(logFilePath);
33+
Action<string> newLog = t => { log(t); logWriter.WriteLine(t); };
34+
35+
const int steps = 10;
36+
var meanStep_ms = Gpt2.VerifyTrain(dataDirectory, llm, steps, newLog);
37+
var boardName = nameof(Gpt2.VerifyTrain);
38+
//Gpt2.Infer(dataDirectory, llm, newLog);
39+
//Gpt2.Train(dataDirectory, llm);
40+
41+
var processorNameInDirectory = GetProcessorName();
42+
log(processorNameInDirectory);
43+
44+
var sourceDirectory = GetSourceDirectory();
45+
var benchmarksDirectory = $"{sourceDirectory}/../../benchmarks/";
46+
var directory = $"{benchmarksDirectory}{processorNameInDirectory}";
47+
if (!Directory.Exists(directory)) { Directory.CreateDirectory(directory); }
48+
49+
var filePathBoard = Path.Combine(directory, $"{boardName}-Board.csv");
50+
var filePathBoardMarkdown = Path.Combine(directory, $"{boardName}-Board.md");
51+
52+
var (colNames, sortedBoard) = UpdateBoardCsv(name, meanStep_ms, filePathBoard);
53+
54+
WriteBoardMarkdown(colNames, sortedBoard, filePathBoardMarkdown);
55+
56+
static void WriteBoardMarkdown(string[] colNames, IReadOnlyList<string[]> sortedCols,
57+
string filePathBoardMarkdown)
58+
{
59+
using var writer = new StreamWriter(filePathBoardMarkdown);
60+
writer.WriteLine($"|{string.Join("|", colNames.Select(c => c.PadLeft(16)))}|");
61+
writer.WriteLine($"|{string.Join("|", colNames.Select(_ => ":").Select(c => c.PadLeft(16, '-')))}|");
62+
foreach (var cols in sortedCols)
63+
{
64+
writer.WriteLine($"|{string.Join("|", cols.Select(c => c.PadLeft(16)))}|");
65+
}
66+
}
67+
68+
static void DownloadBinaryFilesIfNotExists(
69+
IReadOnlyList<string> fileNames, Func<string, string> toUrl,
70+
string dataDirectory, Action<string>? log)
71+
{
72+
foreach (var fileName in fileNames)
73+
{
74+
var filePath = Path.Combine(dataDirectory, fileName);
75+
filePath = Path.GetFullPath(filePath);
76+
if (!File.Exists(filePath))
77+
{
78+
var url = toUrl(fileName);
79+
log?.Invoke($"Downloading '{url}' to '{filePath}'");
80+
using var client = new HttpClient();
81+
// Download the file
82+
var source = client.GetStreamAsync(url).Result;
83+
using var destination = new FileStream(filePath, FileMode.Create);
84+
source.CopyTo(destination);
85+
}
86+
}
87+
}
88+
89+
static string GetSourceDirectory([CallerFilePath] string filePath = "") =>
90+
Path.GetDirectoryName(filePath)!;
91+
92+
static (string[] colNames, string[][] Cols) UpdateBoardCsv(
93+
string name, double mean_ms, string filePathBoard)
94+
{
95+
const string colNameName = "Name";
96+
const string colNameMean = "Mean [ms]";
97+
98+
string[] colNames = [colNameName, colNameMean]; //, "StdDev [ms]", "Allocated [KB]"];
99+
100+
var value = (mean_ms, (string[])[name, mean_ms.ToString("F0")]);
101+
102+
var nameToCols = File.Exists(filePathBoard)
103+
? ReadNameToCols(filePathBoard, colNameName, colNameMean, colNames)
104+
: [];
105+
nameToCols[name] = value;
106+
107+
using var writerBoard = Sep.Writer().ToFile(filePathBoard);
108+
var sorted = nameToCols.Values.OrderBy(v => v.Mean).ToArray();
109+
foreach (var (_, cols) in sorted)
110+
{
111+
using var writeRow = writerBoard.NewRow();
112+
writeRow[colNames].Set(cols);
113+
}
114+
return (colNames, sorted.Select(v => v.Cols).ToArray());
115+
}
116+
117+
static Dictionary<string, (double Mean, string[] Cols)> ReadNameToCols(
118+
string filePath, string colNameName, string colNameMean, string[] colNames)
119+
{
120+
using var reader = Sep
121+
.Reader(o => o with { Unescape = true, DisableFastFloat = true })
122+
.FromFile(filePath);
123+
return reader.Enumerate(r => (Name: r[colNameName].ToString(),
124+
Mean: r[colNameMean].Parse<double>(), Cols: r[colNames].ToStringsArray()))
125+
.ToDictionary(t => t.Name, t => (t.Mean, t.Cols));
126+
}
127+
128+
static string GetProcessorName()
129+
{
130+
var cpuInfo = HostEnvironmentInfo.GetCurrent().CpuInfo.Value;
131+
var processorName = ProcessorBrandStringHelper.Prettify(cpuInfo);
132+
var processorNameInDirectory = processorName
133+
.Replace(" Processor", "").Replace(" CPU", "")
134+
.Replace(" Graphics", "")
135+
.Replace("/", "").Replace("\\", "")
136+
.Replace(" ", ".");
137+
// Remove iGPU info
138+
var indexOfWith = processorNameInDirectory.LastIndexOf(".w.");
139+
if (indexOfWith > 0)
140+
{ processorNameInDirectory = processorNameInDirectory.Substring(0, indexOfWith); }
141+
return processorNameInDirectory;
142+
}

src/Llm/Gpt2.VerifyTrain.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public static ExpectedOutputTensors Create(int batchSize, int tokenCount, int vo
3030
public Tensor<float> ExpectedLogits { get; } = New([B, T, V], s);
3131
}
3232

33-
public static unsafe void VerifyTrain(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
33+
public static unsafe double VerifyTrain(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
3434
{
3535
// build the GPT-2 model from a checkpoint
3636
using var model = ModelFromCheckpoint(dataDirectory + ModelBinaryFileName);
@@ -105,11 +105,13 @@ public static unsafe void VerifyTrain(string dataDirectory, ILlm llmToUse, int s
105105
}
106106
log?.Invoke($"All okay: {allOk}");
107107

108-
var timeReport = llm.CreateReport(steps - JitAndWarmupCount);
108+
var (timeReport, meanStep_ms) = llm.CreateReport(steps - JitAndWarmupCount);
109109

110110
log?.Invoke(timeReport);
111111

112112
if (!allOk) { throw new ArithmeticException($"{llmToUse.GetType().Name} failed {nameof(Gpt2)} train test run, see output for details."); }
113+
114+
return meanStep_ms;
113115
}
114116
}
115117

src/Llm/Llm.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
4949
<_Parameter1>$(MSBuildProjectName).Benchmarks</_Parameter1>
5050
</AssemblyAttribute>
51+
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
52+
<_Parameter1>$(MSBuildProjectName).Runner</_Parameter1>
53+
</AssemblyAttribute>
5154
</ItemGroup>
5255

5356
</Project>

src/Llm/TimeLLm.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ unsafe static void Clear<T>(T* ptr, nint size) where T : unmanaged
155155
NativeMemory.Clear(ptr, (nuint)(size * sizeof(T)));
156156
}
157157

158-
internal string CreateReport(int steps)
158+
internal (string Report, double meanStep_ms) CreateReport(int steps)
159159
{
160160
var keyToStats = _keyToTimes.ToDictionary(p => p.Key, p => ComputeStats(p.Value));
161161
var totalSum_ms = keyToStats.Values.Sum(s => s.Sum_ms);
@@ -183,7 +183,7 @@ internal string CreateReport(int steps)
183183
{
184184
sb.AppendLine($"{method,-27} {sum_ms / totalSum_ms,4:P0} sum: {sum_ms,6:F0} [ms] per step: {sum_ms / steps,6:F0} [ms]");
185185
}
186-
return sb.ToString();
186+
return (sb.ToString(), totalSum_ms / steps);
187187
}
188188

189189
static TimeStats ComputeStats(List<long> times)

0 commit comments

Comments
 (0)