Skip to content

Commit 71e1280

Browse files
feiyun0112feiyun0112
andauthored
Rename Casual to Causal (#7484)
Co-authored-by: feiyun0112 <[email protected]>
1 parent 03a691c commit 71e1280

23 files changed

+80
-80
lines changed

docs/gen-ai/CausalLMPipeline.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ public abstract class CausalLMPipeline
2020
bool echo = false); // echo the input token ids in the output token ids
2121
}
2222

23-
public CasualLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
23+
public CausalLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
2424
where TTokenizer : ITokenizer
2525
where TCausalLM : nn.Module<CausalLanguageModelInput, CausalLanguageModelOutput>
2626
{
27-
public CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> Create(LLama2Tokenizer tokenizer, Phi3ForCasualLM model);
27+
public CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> Create(LLama2Tokenizer tokenizer, Phi3ForCausalLM model);
2828

2929
}
3030
```
@@ -105,7 +105,7 @@ The extension `Generate` method provides a even-easier way to generate text with
105105

106106
```C#
107107
public static string Generate(
108-
this CasualLMPipeline pipeline,
108+
this CausalLMPipeline pipeline,
109109
string prompt,
110110
int maxLen = 128,
111111
float temperature = 0.7f,

docs/gen-ai/Usage.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ This document shows how to use the causal language model API for text generation
77
```C#
88
var pathToPhi3 = "path/to/phi3";
99
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
10-
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
10+
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
1111

12-
CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
12+
CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
1313

1414
var prompt = "<|user|>Once upon a time<|end|><assistant>";
1515
var output = pipeline.Generate(
@@ -24,16 +24,16 @@ In most cases, developers would like to consume the model in a uniformed way. In
2424
```C#
2525
var pathToPhi3 = "path/to/phi3";
2626
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
27-
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
28-
CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
27+
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
28+
CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
2929
var kernel = Kernel.CreateBuilder()
3030
// the type of the tokenizer and the model are explicitly specified
3131
// here for clarity, but the compiler can infer them
3232
// The typed pipeline prevent developers from passing an arbitrary CausalLMPipeline
3333
// The reason why we don't want to allow developers to pass an arbitrary CausalLMPipeline is because
3434
// - the model and the tokenizer must be compatible
3535
// - the chat template must be compatible with the model. e.g. In `AddPhi3AsChatCompletionService`, the chat template is fixed to "<|user|>{prompt}<|end|><assistant>"
36-
.AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCasualLM>(pipeline)
36+
.AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCausalLM>(pipeline)
3737
.Build();
3838
```
3939

@@ -42,7 +42,7 @@ Similarly, developers would also like to consume the language model like agent.
4242
```C#
4343
var pathToPhi3 = "path/to/phi3";
4444
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
45-
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
45+
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
4646
var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
4747
var agent = new Phi3MiniAgent(pipeline, name: "assistant");
4848

@@ -59,7 +59,7 @@ If the model is deployed as a service, developers can consume the model similar
5959
// server.cs
6060
var pathToPhi3 = "path/to/phi3";
6161
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
62-
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
62+
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
6363
var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
6464
var agent = new Phi3MiniAgent(pipeline, name: "assistant");
6565

docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
2525
using var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole());
2626

2727
// create logger
28-
var logger = loggerFactory.CreateLogger<CasualLMSupervisedFineTuningTrainer>();
28+
var logger = loggerFactory.CreateLogger<CausalLMSupervisedFineTuningTrainer>();
2929

3030
var device = "cuda";
3131

@@ -46,10 +46,10 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
4646
var input = CreateDataset(dataset, pipeline.TypedTokenizer, Llama3_1ChatTemplateBuilder.Instance);
4747

4848
// create trainer
49-
var sftTrainer = new CasualLMSupervisedFineTuningTrainer(pipeline, logger: logger);
49+
var sftTrainer = new CausalLMSupervisedFineTuningTrainer(pipeline, logger: logger);
5050

5151
// Train the model
52-
var option = new CasualLMSupervisedFineTuningTrainer.Option
52+
var option = new CausalLMSupervisedFineTuningTrainer.Option
5353
{
5454
BatchSize = 1,
5555
Device = device,

docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ public static async Task RunAsync(string weightFolder)
2727
torch.set_default_dtype(defaultType);
2828
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
2929
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
30-
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
31-
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
30+
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
31+
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
3232
var client = new Phi3CausalLMChatClient(pipeline);
3333

3434
var task = """

docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ public static async Task RunAsync()
2929
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
3030
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
3131
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
32-
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
33-
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
32+
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
33+
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
3434
var question = @"write a C# program to calculate the factorial of a number";
3535

3636
// agent

docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ public static async Task RunChatCompletionSample()
2525
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
2626
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
2727
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
28-
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
29-
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
28+
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
29+
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
3030

3131
var kernel = Kernel.CreateBuilder()
3232
.AddGenAIChatCompletion(pipeline)
@@ -56,8 +56,8 @@ public static async Task RunTextGenerationSample()
5656
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
5757
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
5858
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
59-
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
60-
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
59+
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
60+
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
6161

6262
var kernel = Kernel.CreateBuilder()
6363
.AddGenAITextGeneration(pipeline)

src/Microsoft.ML.GenAI.Core/Trainer/CasualLMSupervisedFineTuningTrainer.cs renamed to src/Microsoft.ML.GenAI.Core/Trainer/CausalLMSupervisedFineTuningTrainer.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313

1414
namespace Microsoft.ML.GenAI.Core.Trainer;
1515

16-
public class CasualLMSupervisedFineTuningTrainer
16+
public class CausalLMSupervisedFineTuningTrainer
1717
{
18-
private readonly ILogger<CasualLMSupervisedFineTuningTrainer>? _logger;
18+
private readonly ILogger<CausalLMSupervisedFineTuningTrainer>? _logger;
1919
private readonly ICausalLMPipeline _pipeline;
2020

21-
public CasualLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CasualLMSupervisedFineTuningTrainer>? logger = null)
21+
public CausalLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CausalLMSupervisedFineTuningTrainer>? logger = null)
2222
{
2323
_logger = logger;
2424
_pipeline = pipeline;

src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ namespace Microsoft.ML.GenAI.Core;
1313

1414
public class AttentionMaskConverter
1515
{
16-
private readonly bool _isCasual;
16+
private readonly bool _isCausal;
1717
private readonly int? _slidingWindow;
1818

1919
public AttentionMaskConverter(bool isCausal, int? slidingWindow)
2020
{
21-
this._isCasual = isCausal;
21+
this._isCausal = isCausal;
2222
this._slidingWindow = slidingWindow;
2323
}
2424

@@ -42,42 +42,42 @@ public Tensor To4D(
4242

4343
// create causal mask
4444
// [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
45-
Tensor? casual4dMask = null;
46-
if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCasual)
45+
Tensor? causal4dMask = null;
46+
if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCausal)
4747
{
4848
if (keyValueLength is null)
4949
{
5050
throw new ArgumentException("key_value_length should be provided when attention_mask is causal");
5151
}
5252

5353
var pastKeyValuesLength = keyValueLength.Value - queryLength;
54-
casual4dMask = MakeCasualMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
54+
causal4dMask = MakeCausalMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
5555
}
5656
else if (this._slidingWindow is not null)
5757
{
5858
throw new NotImplementedException("Sliding window is not supported for non-causal masks");
5959
}
6060

6161
var expandedAttnMask = ExpandMask(attentionMask2d, dType, queryLength).to(attentionMask2d.device);
62-
if (casual4dMask is not null)
62+
if (causal4dMask is not null)
6363
{
6464
var min = torch.finfo(dType).min;
65-
expandedAttnMask = casual4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
65+
expandedAttnMask = causal4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
6666
}
6767

6868
return expandedAttnMask;
6969
}
7070

71-
public Tensor? ToCasual4D(
71+
public Tensor? ToCausal4D(
7272
int batchSize,
7373
int queryLength,
7474
int keyValueLength,
7575
ScalarType dType,
7676
Device device)
7777
{
78-
if (!_isCasual)
78+
if (!_isCausal)
7979
{
80-
throw new ArgumentException("This is not a casual mask");
80+
throw new ArgumentException("This is not a causal mask");
8181
}
8282

8383
long[] inputShape = [batchSize, queryLength];
@@ -88,13 +88,13 @@ public Tensor To4D(
8888
Tensor? causal4DMask = null;
8989
if (queryLength > 1 || this._slidingWindow is int)
9090
{
91-
causal4DMask = MakeCasualMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
91+
causal4DMask = MakeCausalMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
9292
}
9393

9494
return causal4DMask;
9595
}
9696

97-
public static Tensor MakeCasualMask(
97+
public static Tensor MakeCausalMask(
9898
long[] inputIdsShape,
9999
ScalarType dType,
100100
Device device,
@@ -158,7 +158,7 @@ public static Tensor MakeCasualMask(
158158
return converter.To4D(attentionMask, (int)inputShape[1], dType, keyValueLength);
159159
}
160160

161-
return converter.ToCasual4D(batchSize, queryLength, keyValueLength, dType, device);
161+
return converter.ToCausal4D(batchSize, queryLength, keyValueLength, dType, device);
162162
}
163163

164164
public static Tensor ExpandMask(

src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public static class SemanticKernelExtension
1515
{
1616
public static IKernelBuilder AddGenAIChatCompletion(
1717
this IKernelBuilder builder,
18-
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
18+
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
1919
{
2020
builder.Services.AddSingleton<IChatCompletionService>(new Phi3CausalLMChatCompletionService(pipeline));
2121

@@ -24,7 +24,7 @@ public static IKernelBuilder AddGenAIChatCompletion(
2424

2525
public static IKernelBuilder AddGenAITextGeneration(
2626
this IKernelBuilder builder,
27-
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
27+
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
2828
{
2929
builder.Services.AddSingleton<ITextGenerationService>(new Phi3CausalLMTextGenerationService(pipeline));
3030

src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
8080
// use 4d attention mask
8181
if (attentionMask is not null)
8282
{
83-
attentionMask = this.Prepare4DCasualAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
83+
attentionMask = this.Prepare4DCausalAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
8484
}
8585

8686
var hiddenStates = inputEmbeddings;
@@ -100,7 +100,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
100100
return (hiddenStates, null, null);
101101
}
102102

103-
private Tensor Prepare4DCasualAttentionMask(
103+
private Tensor Prepare4DCausalAttentionMask(
104104
Tensor attentionMask,
105105
int queryLength,
106106
int pastKeyValueLength,
@@ -110,11 +110,11 @@ private Tensor Prepare4DCasualAttentionMask(
110110
var seqLen = attentionMask.shape[1];
111111
Contract.Assert(seqLen == queryLength, "seqLen must be equal to queryLength");
112112
var targetLength = queryLength + pastKeyValueLength;
113-
var casual4DMask = this.MakeCasualAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
113+
var causal4DMask = this.MakeCausalAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
114114
var expandedMask = this.ExpandMask(attentionMask, dtype, queryLength).to(attentionMask.device);
115115

116-
casual4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
117-
return casual4DMask;
116+
causal4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
117+
return causal4DMask;
118118
}
119119

120120
private Tensor ExpandMask(
@@ -132,7 +132,7 @@ private Tensor ExpandMask(
132132

133133
return invertedMask.masked_fill(invertedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
134134
}
135-
private Tensor MakeCasualAttentionMask(
135+
private Tensor MakeCausalAttentionMask(
136136
int batchSize,
137137
int targetLen,
138138
int pastKeyValueLength,

0 commit comments

Comments
 (0)