Skip to content

Commit 9875670

Browse files
authored
Azure OpenAI: typespec code generation updates for Chat API (#23206)
* Initial test commit * Test update * Bigger update (extensive local changes) * Updates: full e2e with sdk-for-net * Add suppression for non-Pascal 'OpenAI' * PR feedback; thank you, Jose! * PR feedback * Minor update: remove optionality of 'prompt' for AOAI * PR feedback: CompletionsFinishReason | null * PR feedback on optionals
1 parent b73e2d3 commit 9875670

File tree

7 files changed

+475
-167
lines changed

7 files changed

+475
-167
lines changed

specification/cognitiveservices/OpenAI.Inference/main.tsp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ using TypeSpec.Http;
88
using TypeSpec.Rest;
99
using TypeSpec.Versioning;
1010

11+
#suppress "@azure-tools/typespec-azure-core/casing-style" "OpenAI is a case-sensitive name"
1112
@useAuth(
1213
ApiKeyAuth<ApiKeyLocation.header, "api-key"> |
1314
OAuth2Auth<[{
@@ -37,9 +38,10 @@ namespace Azure.OpenAI;
3738
enum ServiceApiVersions {
3839
@useDependency(Azure.Core.Versions.v1_0_Preview_1)
3940
v2022_12_01: "2022-12-01",
41+
@useDependency(Azure.Core.Versions.v1_0_Preview_1)
42+
v2023_03_15_Preview: "2023-03-15-preview",
4043
}
4144

42-
4345
@doc("A specific deployment")
4446
@TypeSpec.Rest.resource("deployments")
4547
model Deployment {
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import "@typespec/rest";
2+
import "@typespec/http";
3+
import "./completions.common.tsp";
4+
5+
using TypeSpec.Rest;
6+
using TypeSpec.Http;
7+
8+
@doc("A description of the intended purpose of a message within a chat completions interaction.")
9+
enum ChatRole {
10+
@doc("The role that instructs or sets the behavior of the assistant.")
11+
@projectedName("json", "system")
12+
system,
13+
14+
@doc("The role that provides responses to system-instructed, user-prompted input.")
15+
@projectedName("json", "assistant")
16+
assistant,
17+
18+
@doc("The role that provides input for chat completions.")
19+
@projectedName("json", "user")
20+
user,
21+
}
22+
23+
@doc("A single, role-attributed message within a chat completion interaction.")
24+
model ChatMessage {
25+
@doc("The role associated with this message payload.")
26+
@projectedName("json", "role")
27+
role: ChatRole;
28+
29+
@doc("The text associated with this message payload.")
30+
@projectedName("json", "content")
31+
content?: string;
32+
}
33+
34+
@doc("""
35+
The configuration information for a chat completions request.
36+
Completions support a wide variety of tasks and generate text that continues from or "completes"
37+
provided prompt data.
38+
""")
39+
model ChatCompletionsOptions {
40+
@doc("""
41+
The collection of context messages associated with this chat completions request.
42+
Typical usage begins with a chat message for the System role that provides instructions for
43+
the behavior of the assistant, followed by alternating messages between the User and
44+
Assistant roles.
45+
""")
46+
@projectedName("json", "messages")
47+
messages: ChatMessage[];
48+
49+
@doc("The maximum number of tokens to generate.")
50+
@projectedName("json", "max_tokens")
51+
maxTokens?: int32;
52+
53+
@doc("""
54+
The sampling temperature to use that controls the apparent creativity of generated completions.
55+
Higher values will make output more random while lower values will make results more focused
56+
and deterministic.
57+
It is not recommended to modify temperature and top_p for the same completions request as the
58+
interaction of these two settings is difficult to predict.
59+
""")
60+
@projectedName("json", "temperature")
61+
temperature?: float32;
62+
63+
@doc("""
64+
An alternative to sampling with temperature called nucleus sampling. This value causes the
65+
model to consider the results of tokens with the provided probability mass. As an example, a
66+
value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
67+
considered.
68+
It is not recommended to modify temperature and top_p for the same completions request as the
69+
interaction of these two settings is difficult to predict.
70+
""")
71+
@projectedName("json", "top_p")
72+
@projectedName("csharp", "NucleusSamplingFactor")
73+
topP?: float32;
74+
75+
@doc("""
76+
A map between GPT token IDs and bias scores that influences the probability of specific tokens
77+
appearing in a completions response. Token IDs are computed via external tokenizer tools, while
78+
bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to
79+
a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias
80+
score varies by model.
81+
""")
82+
@projectedName("json", "logit_bias")
83+
@projectedName("csharp", "InternalStringKeyedTokenSelectionBiases")
84+
logitBias?: Record<int32>;
85+
86+
@doc("""
87+
An identifier for the caller or end user of the operation. This may be used for tracking
88+
or rate-limiting purposes.
89+
""")
90+
@projectedName("json", "user")
91+
user?: string;
92+
93+
@doc("""
94+
The number of chat completions choices that should be generated for a chat completions
95+
response.
96+
Because this setting can generate many completions, it may quickly consume your token quota.
97+
Use carefully and ensure reasonable settings for max_tokens and stop.
98+
""")
99+
@projectedName("json", "n")
100+
@projectedName("csharp", "ChoiceCount")
101+
n?: int32;
102+
103+
@doc("""
104+
A collection of textual sequences that will end completions generation.
105+
""")
106+
@projectedName("json", "stop")
107+
@projectedName("csharp", "StopSequences")
108+
stop?: string[];
109+
110+
@doc("""
111+
A value that influences the probability of generated tokens appearing based on their existing
112+
presence in generated text.
113+
Positive values will make tokens less likely to appear when they already exist and increase the
114+
model's likelihood to output new topics.
115+
""")
116+
@projectedName("json", "presence_penalty")
117+
presencePenalty?: float32;
118+
119+
@doc("""
120+
A value that influences the probability of generated tokens appearing based on their cumulative
121+
frequency in generated text.
122+
Positive values will make tokens less likely to appear as their frequency increases and
123+
decrease the likelihood of the model repeating the same statements verbatim.
124+
""")
125+
@projectedName("json", "frequency_penalty")
126+
frequencyPenalty?: float32;
127+
128+
@doc("""
129+
A value indicating whether chat completions should be streamed for this request.
130+
""")
131+
@projectedName("json", "stream")
132+
@projectedName("csharp", "InternalShouldStreamResponse")
133+
stream?: boolean;
134+
135+
@doc("""
136+
The model name to provide as part of this completions request.
137+
Not applicable to Azure OpenAI, where deployment information should be included in the Azure
138+
resource URI that's connected to.
139+
""")
140+
@projectedName("json", "model")
141+
@projectedName("csharp", "InternalNonAzureModelName")
142+
"model"?: string;
143+
};
144+
145+
@doc("""
146+
The representation of a single prompt completion as part of an overall chat completions request.
147+
Generally, `n` choices are generated per provided prompt with a default value of 1.
148+
Token limits and other settings may limit the number of choices generated.
149+
""")
150+
model ChatChoice {
151+
@doc("The chat message for a given chat completions prompt.")
152+
@projectedName("json", "message")
153+
message?: ChatMessage;
154+
155+
@doc("The ordered index associated with this chat completions choice.")
156+
@projectedName("json", "index")
157+
index: int32;
158+
159+
@doc("The reason that this chat completions choice completed its generated.")
160+
@projectedName("json", "finish_reason")
161+
finishReason: CompletionsFinishReason | null;
162+
163+
@doc("The delta message content for a streaming response.")
164+
@projectedName("json", "delta")
165+
@projectedName("csharp", "InternalStreamingDeltaMessage")
166+
delta?: ChatMessage;
167+
}
168+
169+
@doc("""
170+
Representation of the response data from a chat completions request.
171+
Completions support a wide variety of tasks and generate text that continues from or "completes"
172+
provided prompt data.
173+
""")
174+
model ChatCompletions {
175+
@doc("A unique identifier associated with this chat completions response.")
176+
@projectedName("json", "id")
177+
id: string;
178+
179+
@doc("""
180+
The first timestamp associated with generation activity for this completions response,
181+
represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
182+
""")
183+
@projectedName("json", "created")
184+
@projectedName("csharp", "InternalCreatedSecondsAfterUnixEpoch")
185+
created: int32;
186+
187+
@doc("""
188+
The collection of completions choices associated with this completions response.
189+
Generally, `n` choices are generated per provided prompt with a default value of 1.
190+
Token limits and other settings may limit the number of choices generated.
191+
""")
192+
@projectedName("json", "choices")
193+
choices: ChatChoice[];
194+
195+
@doc("""
196+
Usage information for tokens processed and generated as part of this completions operation.
197+
""")
198+
@projectedName("json", "usage")
199+
usage: CompletionsUsage;
200+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
@doc("""
2+
Representation of the token counts processed for a completions request.
3+
Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
4+
other consumers.
5+
""")
6+
model CompletionsUsage {
7+
@doc("The number of tokens generated across all completions emissions.")
8+
@projectedName("json", "completion_tokens")
9+
completionTokens: int32,
10+
@doc("The number of tokens in the provided prompts for the completions request.")
11+
@projectedName("json", "prompt_tokens")
12+
promptTokens: int32,
13+
@doc("The total number of tokens processed for the completions request and response.")
14+
@projectedName("json", "total_tokens")
15+
totalTokens: int32
16+
}
17+
18+
@doc("""
19+
Representation of the manner in which a completions response concluded.
20+
""")
21+
enum CompletionsFinishReason {
22+
@doc("Completions ended normally and reached its end of token generation.")
23+
@projectedName("json", "stop")
24+
stopped,
25+
26+
@doc("Completions exhausted available token limits before generation could complete.")
27+
@projectedName("json", "length")
28+
tokenLimitReached,
29+
30+
@doc("""
31+
Completions generated a response that was identified as potentially sensitive per content
32+
moderation policies.
33+
""")
34+
@projectedName("json", "content_filter")
35+
contentFiltered
36+
}

0 commit comments

Comments
 (0)