Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ public class EmbeddingsModelDetails
@JsonProperty("params")
private EmbeddingsModelParams params;

@JsonProperty("timeout")
private Integer timeout = 600;

@JsonProperty("max_retries")
private Integer maxRetries = 2;

@JsonAnySetter @JsonAnyGetter
private final Map<String, Object> cloudSdkCustomFields = new LinkedHashMap<>();

Expand Down Expand Up @@ -136,6 +142,75 @@ public void setParams(@Nullable final EmbeddingsModelParams params) {
this.params = params;
}

/**
* Set the timeout of this {@link EmbeddingsModelDetails} instance and return the same instance.
*
* @param timeout Timeout for the Embeddings request in seconds. This parameter is currently
* ignored for Vertex AI models. Minimum: 1 Maximum: 600
* @return The same instance of this {@link EmbeddingsModelDetails} class
*/
@Nonnull
public EmbeddingsModelDetails timeout(@Nullable final Integer timeout) {
this.timeout = timeout;
return this;
}

/**
* Timeout for the Embeddings request in seconds. This parameter is currently ignored for Vertex
* AI models. minimum: 1 maximum: 600
*
* @return timeout The timeout of this {@link EmbeddingsModelDetails} instance.
*/
@Nonnull
public Integer getTimeout() {
return timeout;
}

/**
* Set the timeout of this {@link EmbeddingsModelDetails} instance.
*
* @param timeout Timeout for the Embeddings request in seconds. This parameter is currently
* ignored for Vertex AI models. Minimum: 1 Maximum: 600
*/
public void setTimeout(@Nullable final Integer timeout) {
this.timeout = timeout;
}

/**
* Set the maxRetries of this {@link EmbeddingsModelDetails} instance and return the same
* instance.
*
* @param maxRetries Maximum number of retries for the Embeddings request. This parameter is
* currently ignored for Vertex AI models. Minimum: 0 Maximum: 5
* @return The same instance of this {@link EmbeddingsModelDetails} class
*/
@Nonnull
public EmbeddingsModelDetails maxRetries(@Nullable final Integer maxRetries) {
this.maxRetries = maxRetries;
return this;
}

/**
* Maximum number of retries for the Embeddings request. This parameter is currently ignored for
* Vertex AI models. minimum: 0 maximum: 5
*
* @return maxRetries The maxRetries of this {@link EmbeddingsModelDetails} instance.
*/
@Nonnull
public Integer getMaxRetries() {
return maxRetries;
}

/**
* Set the maxRetries of this {@link EmbeddingsModelDetails} instance.
*
* @param maxRetries Maximum number of retries for the Embeddings request. This parameter is
* currently ignored for Vertex AI models. Minimum: 0 Maximum: 5
*/
public void setMaxRetries(@Nullable final Integer maxRetries) {
this.maxRetries = maxRetries;
}

/**
* Get the names of the unrecognizable properties of the {@link EmbeddingsModelDetails}.
*
Expand Down Expand Up @@ -178,6 +253,8 @@ public Map<String, Object> toMap() {
if (name != null) declaredFields.put("name", name);
if (version != null) declaredFields.put("version", version);
if (params != null) declaredFields.put("params", params);
if (timeout != null) declaredFields.put("timeout", timeout);
if (maxRetries != null) declaredFields.put("maxRetries", maxRetries);
return declaredFields;
}

Expand Down Expand Up @@ -205,12 +282,14 @@ public boolean equals(@Nullable final java.lang.Object o) {
return Objects.equals(this.cloudSdkCustomFields, embeddingsModelDetails.cloudSdkCustomFields)
&& Objects.equals(this.name, embeddingsModelDetails.name)
&& Objects.equals(this.version, embeddingsModelDetails.version)
&& Objects.equals(this.params, embeddingsModelDetails.params);
&& Objects.equals(this.params, embeddingsModelDetails.params)
&& Objects.equals(this.timeout, embeddingsModelDetails.timeout)
&& Objects.equals(this.maxRetries, embeddingsModelDetails.maxRetries);
}

@Override
public int hashCode() {
return Objects.hash(name, version, params, cloudSdkCustomFields);
return Objects.hash(name, version, params, timeout, maxRetries, cloudSdkCustomFields);
}

@Override
Expand All @@ -221,6 +300,8 @@ public String toString() {
sb.append(" name: ").append(toIndentedString(name)).append("\n");
sb.append(" version: ").append(toIndentedString(version)).append("\n");
sb.append(" params: ").append(toIndentedString(params)).append("\n");
sb.append(" timeout: ").append(toIndentedString(timeout)).append("\n");
sb.append(" maxRetries: ").append(toIndentedString(maxRetries)).append("\n");
cloudSdkCustomFields.forEach(
(k, v) ->
sb.append(" ").append(k).append(": ").append(toIndentedString(v)).append("\n"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ public class LLMModelDetails
@JsonProperty("params")
private Map<String, Object> params = new HashMap<>();

@JsonProperty("timeout")
private Integer timeout = 600;

@JsonProperty("max_retries")
private Integer maxRetries = 2;

@JsonAnySetter @JsonAnyGetter
private final Map<String, Object> cloudSdkCustomFields = new LinkedHashMap<>();

Expand Down Expand Up @@ -159,6 +165,74 @@ public void setParams(@Nullable final Map<String, Object> params) {
this.params = params;
}

/**
* Set the timeout of this {@link LLMModelDetails} instance and return the same instance.
*
* @param timeout Timeout for the LLM request in seconds. This parameter is currently ignored for
* Vertex AI models. Minimum: 1 Maximum: 600
* @return The same instance of this {@link LLMModelDetails} class
*/
@Nonnull
public LLMModelDetails timeout(@Nullable final Integer timeout) {
this.timeout = timeout;
return this;
}

/**
* Timeout for the LLM request in seconds. This parameter is currently ignored for Vertex AI
* models. minimum: 1 maximum: 600
*
* @return timeout The timeout of this {@link LLMModelDetails} instance.
*/
@Nonnull
public Integer getTimeout() {
return timeout;
}

/**
* Set the timeout of this {@link LLMModelDetails} instance.
*
* @param timeout Timeout for the LLM request in seconds. This parameter is currently ignored for
* Vertex AI models. Minimum: 1 Maximum: 600
*/
public void setTimeout(@Nullable final Integer timeout) {
this.timeout = timeout;
}

/**
* Set the maxRetries of this {@link LLMModelDetails} instance and return the same instance.
*
* @param maxRetries Maximum number of retries for the LLM request. This parameter is currently
* ignored for Vertex AI models. Minimum: 0 Maximum: 5
* @return The same instance of this {@link LLMModelDetails} class
*/
@Nonnull
public LLMModelDetails maxRetries(@Nullable final Integer maxRetries) {
this.maxRetries = maxRetries;
return this;
}

/**
* Maximum number of retries for the LLM request. This parameter is currently ignored for Vertex
* AI models. minimum: 0 maximum: 5
*
* @return maxRetries The maxRetries of this {@link LLMModelDetails} instance.
*/
@Nonnull
public Integer getMaxRetries() {
return maxRetries;
}

/**
* Set the maxRetries of this {@link LLMModelDetails} instance.
*
* @param maxRetries Maximum number of retries for the LLM request. This parameter is currently
* ignored for Vertex AI models. Minimum: 0 Maximum: 5
*/
public void setMaxRetries(@Nullable final Integer maxRetries) {
this.maxRetries = maxRetries;
}

/**
* Get the names of the unrecognizable properties of the {@link LLMModelDetails}.
*
Expand Down Expand Up @@ -200,6 +274,8 @@ public Map<String, Object> toMap() {
if (name != null) declaredFields.put("name", name);
if (version != null) declaredFields.put("version", version);
if (params != null) declaredFields.put("params", params);
if (timeout != null) declaredFields.put("timeout", timeout);
if (maxRetries != null) declaredFields.put("maxRetries", maxRetries);
return declaredFields;
}

Expand Down Expand Up @@ -227,12 +303,14 @@ public boolean equals(@Nullable final java.lang.Object o) {
return Objects.equals(this.cloudSdkCustomFields, llMModelDetails.cloudSdkCustomFields)
&& Objects.equals(this.name, llMModelDetails.name)
&& Objects.equals(this.version, llMModelDetails.version)
&& Objects.equals(this.params, llMModelDetails.params);
&& Objects.equals(this.params, llMModelDetails.params)
&& Objects.equals(this.timeout, llMModelDetails.timeout)
&& Objects.equals(this.maxRetries, llMModelDetails.maxRetries);
}

@Override
public int hashCode() {
return Objects.hash(name, version, params, cloudSdkCustomFields);
return Objects.hash(name, version, params, timeout, maxRetries, cloudSdkCustomFields);
}

@Override
Expand All @@ -243,6 +321,8 @@ public String toString() {
sb.append(" name: ").append(toIndentedString(name)).append("\n");
sb.append(" version: ").append(toIndentedString(version)).append("\n");
sb.append(" params: ").append(toIndentedString(params)).append("\n");
sb.append(" timeout: ").append(toIndentedString(timeout)).append("\n");
sb.append(" maxRetries: ").append(toIndentedString(maxRetries)).append("\n");
cloudSdkCustomFields.forEach(
(k, v) ->
sb.append(" ").append(k).append(": ").append(toIndentedString(v)).append("\n"));
Expand Down
26 changes: 25 additions & 1 deletion orchestration/src/main/resources/spec/orchestration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,19 @@ components:
type: string
default: "latest"
params:
$ref: "#/components/schemas/EmbeddingsModelParams"
$ref: "#/components/schemas/EmbeddingsModelParams"
timeout:
description: Timeout for the Embeddings request in seconds. This parameter is currently ignored for Vertex AI models.
type: integer
default: 600
minimum: 1
maximum: 600
max_retries:
description: Maximum number of retries for the Embeddings request. This parameter is currently ignored for Vertex AI models.
type: integer
default: 2
minimum: 0
maximum: 5
EmbeddingsModelParams:
type: object
description: Additional parameters for generating input's embeddings. Default values are used for mandatory parameters.
Expand Down Expand Up @@ -828,6 +840,18 @@ components:
n: 2
stream_options:
include_usage: true
timeout:
description: Timeout for the LLM request in seconds. This parameter is currently ignored for Vertex AI models.
type: integer
default: 600
minimum: 1
maximum: 600
max_retries:
description: Maximum number of retries for the LLM request. This parameter is currently ignored for Vertex AI models.
type: integer
default: 2
minimum: 0
maximum: 5

# --- Templating Module with User Defined Template ---
# response_format api definition taken from: https://github.com/openai/openai-openapi/blob/e0cb2d721753e13e69e918465795d6e9f87ab15a/openapi.yaml#L12286
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1400,6 +1400,8 @@ void testEmbeddingCallWithMasking() {
"model": {
"name": "text-embedding-3-large",
"version": "latest",
"timeout" : 600,
"max_retries" : 2,
"params": {
"encoding_format": "float",
"dimensions": 5,
Expand Down
6 changes: 4 additions & 2 deletions orchestration/src/test/resources/chatMemory.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
"model": {
"name": "gpt-4o",
"params": {},
"version": "latest"
"version": "latest",
"timeout" : 600,
"max_retries" : 2
},
"prompt": {
"template": [
Expand All @@ -31,4 +33,4 @@
},
"placeholder_values": {},
"messages_history": []
}
}
6 changes: 4 additions & 2 deletions orchestration/src/test/resources/filteringLooseRequest.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
"top_p": 1,
"n": 1
},
"version": "latest"
"version": "latest",
"timeout" : 600,
"max_retries" : 2
},
"prompt": {
"template": [
Expand Down Expand Up @@ -63,4 +65,4 @@
},
"placeholder_values": {},
"messages_history": []
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
"top_p": 1,
"n": 1
},
"version": "latest"
"version": "latest",
"timeout" : 600,
"max_retries" : 2
},
"prompt": {
"template": [
Expand Down Expand Up @@ -53,4 +55,4 @@
"userMessage": "What is a fuzzy search?"
},
"messages_history": []
}
}
6 changes: 4 additions & 2 deletions orchestration/src/test/resources/groundingRequest.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
"top_p": 1,
"n": 1
},
"version": "latest"
"version": "latest",
"timeout" : 600,
"max_retries" : 2
},
"prompt": {
"template": [
Expand Down Expand Up @@ -94,4 +96,4 @@
"query": "String used for similarity search in database"
},
"messages_history": []
}
}
6 changes: 4 additions & 2 deletions orchestration/src/test/resources/jsonObjectRequest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
"model": {
"name": "gpt-4o-mini",
"params": {},
"version": "latest"
"version": "latest",
"timeout" : 600,
"max_retries" : 2
},
"prompt": {
"template": [
Expand All @@ -29,4 +31,4 @@
},
"placeholder_values": {},
"messages_history": []
}
}
Loading
Loading