Merge pull request #282 from openai/dev/zhang/fix

zhang-openai · web-flow · commit 0f5de60a3d2b · 2024-06-17T16:19:58.000-07:00
Fixes documentation error
diff --git a/openapi.yaml b/openapi.yaml
@@ -7206,17 +7206,6 @@ components:
                         If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
 
                         Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
-                service_level:
-                    description: |
-                        Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:
-                          - If set to 'auto', the system will utilize scale tier credits until they are exhausted.
-                          - If set to 'default', the request will be processed in the shared cluster.
-
-                          When this parameter is set, the response body will include the `service_tier` utilized.
-                    type: string
-                    enum: ["auto", "default"]
-                    nullable: true
-                    default: null
                 stop:
                     description: &completions_stop_description >
                         Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
@@ -7936,6 +7925,17 @@ components:
                         Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
                     x-oaiMeta:
                         beta: true
+                service_tier:
+                  description: |
+                      Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:
+                        - If set to 'auto', the system will utilize scale tier credits until they are exhausted.
+                        - If set to 'default', the request will be processed in the shared cluster.
+
+                        When this parameter is set, the response body will include the `service_tier` utilized.
+                  type: string
+                  enum: ["auto", "default"]
+                  nullable: true
+                  default: null
                 stop:
                     description: |
                         Up to 4 sequences where the API will stop generating further tokens.
@@ -8077,7 +8077,7 @@ components:
                 model:
                     type: string
                     description: The model used for the chat completion.
-                scale_tier:
+                service_tier:
                     description: The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request.
                     type: string
                     enum: ["scale", "default"]
@@ -8259,6 +8259,12 @@ components:
                 model:
                     type: string
                     description: The model to generate the completion.
+                service_tier:
+                    description: The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request.
+                    type: string
+                    enum: ["scale", "default"]
+                    example: "scale"
+                    nullable: true
                 system_fingerprint:
                     type: string
                     description: |