|
| 1 | +// Copyright 2025 Google LLC |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +import Foundation |
| 16 | + |
| 17 | +/// A struct defining model parameters to be used when sending generative AI |
| 18 | +/// requests to the backend model. |
| 19 | +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) |
| 20 | +public struct LiveGenerationConfig: Sendable { |
| 21 | + /// Controls the degree of randomness in token selection. |
| 22 | + let temperature: Float? |
| 23 | + |
| 24 | + /// Controls diversity of generated text. |
| 25 | + let topP: Float? |
| 26 | + |
| 27 | + /// Limits the number of highest probability words considered. |
| 28 | + let topK: Int? |
| 29 | + |
| 30 | + /// The number of response variations to return. |
| 31 | + let candidateCount: Int? |
| 32 | + |
| 33 | + /// Maximum number of tokens that can be generated in the response. |
| 34 | + let maxOutputTokens: Int? |
| 35 | + |
| 36 | + /// Controls the likelihood of repeating the same words or phrases already generated in the text. |
| 37 | + let presencePenalty: Float? |
| 38 | + |
| 39 | + /// Controls the likelihood of repeating words, with the penalty increasing for each repetition. |
| 40 | + let frequencyPenalty: Float? |
| 41 | + |
| 42 | + /// Supported modalities of the response. |
| 43 | + let responseModalities: [ResponseModality]? |
| 44 | + |
| 45 | + /// Creates a new `GenerationConfig` value. |
| 46 | + /// |
| 47 | + /// See the |
| 48 | + /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters) |
| 49 | + /// guide and the |
| 50 | + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) |
| 51 | + /// for more details. |
| 52 | + /// |
| 53 | + /// - Parameters: |
| 54 | + /// - temperature:Controls the randomness of the language model's output. Higher values (for |
| 55 | + /// example, 1.0) make the text more random and creative, while lower values (for example, |
| 56 | + /// 0.1) make it more focused and deterministic. |
| 57 | + /// |
| 58 | + /// > Note: A temperature of 0 means that the highest probability tokens are always selected. |
| 59 | + /// > In this case, responses for a given prompt are mostly deterministic, but a small amount |
| 60 | + /// > of variation is still possible. |
| 61 | + /// |
| 62 | + /// > Important: The range of supported temperature values depends on the model; see the |
| 63 | + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature) |
| 64 | + /// > for more details. |
| 65 | + /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse |
| 66 | + /// text, while lower values (e.g., 0.5) make the output more focused. |
| 67 | + /// |
| 68 | + /// The supported range is 0.0 to 1.0. |
| 69 | + /// |
| 70 | + /// > Important: The default `topP` value depends on the model; see the |
| 71 | + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p) |
| 72 | + /// > for more details. |
| 73 | + /// - topK: Limits the number of highest probability words the model considers when generating |
| 74 | + /// text. For example, a topK of 40 means only the 40 most likely words are considered for the |
| 75 | + /// next token. A higher value increases diversity, while a lower value makes the output more |
| 76 | + /// deterministic. |
| 77 | + /// |
| 78 | + /// The supported range is 1 to 40. |
| 79 | + /// |
| 80 | + /// > Important: Support for `topK` and the default value depends on the model; see the |
| 81 | + /// [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k) |
| 82 | + /// for more details. |
| 83 | + /// - candidateCount: The number of response variations to return; defaults to 1 if not set. |
| 84 | + /// Support for multiple candidates depends on the model; see the |
| 85 | + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) |
| 86 | + /// for more details. |
| 87 | + /// - maxOutputTokens: Maximum number of tokens that can be generated in the response. |
| 88 | + /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens) |
| 89 | + /// for more details. |
| 90 | + /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already |
| 91 | + /// generated in the text. Higher values increase the penalty of repetition, resulting in more |
| 92 | + /// diverse output. |
| 93 | + /// |
| 94 | + /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition, |
| 95 | + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase |
| 96 | + /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each* |
| 97 | + /// > repetition of a word/phrase. |
| 98 | + /// |
| 99 | + /// > Important: The range of supported `presencePenalty` values depends on the model; see the |
| 100 | + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) |
| 101 | + /// > for more details |
| 102 | + /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty |
| 103 | + /// increasing for each repetition. Higher values increase the penalty of repetition, |
| 104 | + /// resulting in more diverse output. |
| 105 | + /// |
| 106 | + /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition, |
| 107 | + /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas |
| 108 | + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase |
| 109 | + /// > has already appeared. |
| 110 | + /// |
| 111 | + /// > Important: The range of supported `frequencyPenalty` values depends on the model; see |
| 112 | + /// > the |
| 113 | + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) |
| 114 | + /// > for more details |
| 115 | + /// - responseModalities: The data types (modalities) that may be returned in model responses. |
| 116 | + /// |
| 117 | + /// See the [multimodal |
| 118 | + /// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation) |
| 119 | + /// documentation for more details. |
| 120 | + /// |
| 121 | + /// > Warning: Specifying response modalities is a **Public Preview** feature, which means |
| 122 | + /// > that it is not subject to any SLA or deprecation policy and could change in |
| 123 | + /// > backwards-incompatible ways. |
| 124 | + public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, |
| 125 | + candidateCount: Int? = nil, maxOutputTokens: Int? = nil, |
| 126 | + presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, |
| 127 | + responseModalities: [ResponseModality]? = nil) { |
| 128 | + // Explicit init because otherwise if we re-arrange the above variables it changes the API |
| 129 | + // surface. |
| 130 | + self.temperature = temperature |
| 131 | + self.topP = topP |
| 132 | + self.topK = topK |
| 133 | + self.candidateCount = candidateCount |
| 134 | + self.maxOutputTokens = maxOutputTokens |
| 135 | + self.presencePenalty = presencePenalty |
| 136 | + self.frequencyPenalty = frequencyPenalty |
| 137 | + self.responseModalities = responseModalities |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +// MARK: - Codable Conformances |
| 142 | + |
| 143 | +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) |
| 144 | +extension LiveGenerationConfig: Encodable { |
| 145 | + enum CodingKeys: String, CodingKey { |
| 146 | + case temperature |
| 147 | + case topP |
| 148 | + case topK |
| 149 | + case candidateCount |
| 150 | + case maxOutputTokens |
| 151 | + case presencePenalty |
| 152 | + case frequencyPenalty |
| 153 | + case responseModalities |
| 154 | + } |
| 155 | +} |
0 commit comments