Skip to content

Commit 823fc17

Browse files
authored
Misc Changes (dotnet#7264)
* Add o1 model support * Replace Usage of tuples with Range in EncodedToken and Remove TorchSharp Range/Index implementation * Rename SentencePieceBpeTokenizer to allow adding more models to it in the future. * Make Tokenizer.Decode returns non-nullable string * Make BPE tokenizer support added tokens * add net9 package source to the nuget.config file * Rename TiktokenPreTokenizer to RegexPreTokenizer
1 parent e794342 commit 823fc17

File tree

51 files changed

+433
-565
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+433
-565
lines changed

NuGet.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
<add key="mlnet-assets" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/machinelearning-assets/nuget/v3/index.json" />
1616
<add key="dotnet-libraries-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries-transport/nuget/v3/index.json" />
1717
<add key="dotnet8" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet8/nuget/v3/index.json" />
18+
<add key="dotnet9" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet9/nuget/v3/index.json" />
1819
</packageSources>
1920
<packageSourceMapping>
2021
<packageSource key="dotnet-public">
@@ -47,6 +48,9 @@
4748
<packageSource key="dotnet8">
4849
<package pattern="*" />
4950
</packageSource>
51+
<packageSource key="dotnet9">
52+
<package pattern="*" />
53+
</packageSource>
5054
</packageSourceMapping>
5155
<disabledPackageSources>
5256
<clear />

docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
<!-- Remove once we have resolved the TorchSharp issue. -->
1010
<ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>None</ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>
11+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
1112
</PropertyGroup>
1213

1314
<ItemGroup>

eng/Versions.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
<GoogleProtobufVersion>3.27.1</GoogleProtobufVersion>
4242
<LightGBMVersion>3.3.5</LightGBMVersion>
4343
<MicrosoftBclHashCodeVersion>1.1.1</MicrosoftBclHashCodeVersion>
44+
<MicrosoftBclMemoryVersion>9.0.0-rc.1.24431.7</MicrosoftBclMemoryVersion>
4445
<MicrosoftCodeAnalysisAnalyzersVersion>3.3.4</MicrosoftCodeAnalysisAnalyzersVersion>
4546
<MicrosoftCodeAnalysisCSharpVersion>4.9.2</MicrosoftCodeAnalysisCSharpVersion>
4647
<MicrosoftDotNetInteractiveVersion>1.0.0-beta.24375.2</MicrosoftDotNetInteractiveVersion>

src/Microsoft.ML.AutoML.Interactive/Microsoft.ML.AutoML.Interactive.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
<TargetFramework>net6.0</TargetFramework>
55
<IsPackable>false</IsPackable>
66
<NoWarn>$(NoWarn)</NoWarn>
7-
7+
88
<!-- Remove once we have resolved the TorchSharp issue. -->
99
<ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>None</ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>
10+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
1011
</PropertyGroup>
1112

1213
<ItemGroup>

src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
<LangVersion>preview</LangVersion>
88
</PropertyGroup>
99

10+
<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
11+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
12+
</PropertyGroup>
13+
1014
<ItemGroup>
1115
<PackageReference Include="AutoGen.Core" Version="$(AutoGenVersion)" />
1216
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="$(SemanticKernelVersion)" />

src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ public virtual IEnumerable<string> GenerateStreaming(
255255

256256
return tokens
257257
// Skip the first _ token automatically added by tokenizer
258-
.Where(t => t.Offset != (0, 0))
258+
.Where(t => !t.Offset.Equals(new Range(0, 0)))
259259
.Select(t => t.Id)
260260
.ToArray();
261261
}));
@@ -268,13 +268,13 @@ public virtual IEnumerable<string> GenerateStreaming(
268268
var tokenIds = token[0].to_type(ScalarType.Int32).data<int>().ToArray();
269269
var duplicateTokenString = this.Tokenizer switch
270270
{
271-
SentencePieceBpeTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
271+
SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
272272
_ => this.Tokenizer.Decode(tokenIds.Concat(tokenIds)) ?? throw new InvalidOperationException("Failed to decode token ids"),
273273
};
274274

275275
var tokenString = this.Tokenizer switch
276276
{
277-
SentencePieceBpeTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
277+
SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
278278
_ => this.Tokenizer.Decode(tokenIds) ?? throw new InvalidOperationException("Failed to decode token ids"),
279279
};
280280

src/Microsoft.ML.GenAI.LLaMA/LlamaTokenizerHelper.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public static TiktokenTokenizer FromPretrained(
4949
string modelFile = "tokenizer.model")
5050
{
5151
var modelFilePath = Path.Join(modelWeightFolder, modelFile);
52-
var preTokenizer = new TiktokenPreTokenizer(new Regex(_re), _specialTokens);
52+
var preTokenizer = new RegexPreTokenizer(new Regex(_re), _specialTokens);
5353
return TiktokenTokenizer.Create(File.OpenRead(modelFilePath), preTokenizer, normalizer: null, specialTokens: _specialTokens);
5454
}
5555
}

src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
<IsPackable>true</IsPackable>
88
</PropertyGroup>
99

10+
<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
11+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
12+
</PropertyGroup>
13+
1014
<ItemGroup>
1115
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
1216
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />

src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
<IsPackable>true</IsPackable>
88
</PropertyGroup>
99

10+
<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
11+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
12+
</PropertyGroup>
13+
1014
<ItemGroup>
1115
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
1216
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />

src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
<IsPackable>true</IsPackable>
88
</PropertyGroup>
99

10+
<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
11+
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
12+
</PropertyGroup>
13+
1014
<ItemGroup>
1115
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
1216
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
@@ -23,5 +27,5 @@
2327
<ItemGroup>
2428
<EmbeddedResource Include="Resource\Config\*.json" />
2529
</ItemGroup>
26-
30+
2731
</Project>

0 commit comments

Comments
 (0)