2929
3030import static org .elasticsearch .xpack .inference .services .ServiceFields .MAX_INPUT_TOKENS ;
3131import static org .elasticsearch .xpack .inference .services .ServiceFields .MODEL_ID ;
32+ import static org .elasticsearch .xpack .inference .services .ServiceFields .URL ;
3233import static org .elasticsearch .xpack .inference .services .ServiceUtils .createUri ;
3334import static org .elasticsearch .xpack .inference .services .ServiceUtils .extractOptionalPositiveInteger ;
34- import static org .elasticsearch .xpack .inference .services .ServiceUtils .extractRequiredString ;
35+ import static org .elasticsearch .xpack .inference .services .ServiceUtils .extractOptionalString ;
3536import static org .elasticsearch .xpack .inference .services .huggingface .HuggingFaceServiceSettings .extractUri ;
3637
3738/**
@@ -47,11 +48,9 @@ public class HuggingFaceChatCompletionServiceSettings extends FilteredXContentOb
4748 HuggingFaceRateLimitServiceSettings {
4849
4950 public static final String NAME = "hugging_face_completion_service_settings" ;
50- public static final String URL = "url" ;
5151 // At the time of writing HuggingFace hasn't posted the default rate limit for inference endpoints so the value his is only a guess
5252 // 3000 requests per minute
5353 private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings (3000 );
54- private static final int DEFAULT_TOKEN_LIMIT = 512 ;
5554
5655 /**
5756 * Creates a new instance of {@link HuggingFaceChatCompletionServiceSettings} from a map of settings.
@@ -62,7 +61,7 @@ public class HuggingFaceChatCompletionServiceSettings extends FilteredXContentOb
6261 public static HuggingFaceChatCompletionServiceSettings fromMap (Map <String , Object > map , ConfigurationParseContext context ) {
6362 ValidationException validationException = new ValidationException ();
6463
65- String modelId = extractRequiredString (map , MODEL_ID , ModelConfigurations .SERVICE_SETTINGS , validationException );
64+ String modelId = extractOptionalString (map , MODEL_ID , ModelConfigurations .SERVICE_SETTINGS , validationException );
6665
6766 var uri = extractUri (map , URL , validationException );
6867
@@ -93,7 +92,7 @@ public static HuggingFaceChatCompletionServiceSettings fromMap(Map<String, Objec
9392 private final RateLimitSettings rateLimitSettings ;
9493
9594 public HuggingFaceChatCompletionServiceSettings (
96- String modelId ,
95+ @ Nullable String modelId ,
9796 String url ,
9897 @ Nullable Integer maxInputTokens ,
9998 @ Nullable RateLimitSettings rateLimitSettings
@@ -102,14 +101,14 @@ public HuggingFaceChatCompletionServiceSettings(
102101 }
103102
104103 public HuggingFaceChatCompletionServiceSettings (
105- String modelId ,
104+ @ Nullable String modelId ,
106105 URI uri ,
107106 @ Nullable Integer maxInputTokens ,
108107 @ Nullable RateLimitSettings rateLimitSettings
109108 ) {
110109 this .modelId = modelId ;
111110 this .uri = uri ;
112- this .maxInputTokens = Objects . requireNonNullElse ( maxInputTokens , DEFAULT_TOKEN_LIMIT ) ;
111+ this .maxInputTokens = maxInputTokens ;
113112 this .rateLimitSettings = Objects .requireNonNullElse (rateLimitSettings , DEFAULT_RATE_LIMIT_SETTINGS );
114113 }
115114
@@ -119,15 +118,14 @@ public HuggingFaceChatCompletionServiceSettings(
119118 * @throws IOException if an I/O error occurs
120119 */
121120 public HuggingFaceChatCompletionServiceSettings (StreamInput in ) throws IOException {
122- this .modelId = in .readString ();
121+ this .modelId = in .readOptionalString ();
123122 this .uri = createUri (in .readString ());
123+ this .maxInputTokens = in .readOptionalVInt ();
124124
125125 if (in .getTransportVersion ().onOrAfter (TransportVersions .V_8_15_0 )) {
126126 this .rateLimitSettings = new RateLimitSettings (in );
127- this .maxInputTokens = in .readOptionalVInt ();
128127 } else {
129128 this .rateLimitSettings = DEFAULT_RATE_LIMIT_SETTINGS ;
130- this .maxInputTokens = DEFAULT_TOKEN_LIMIT ;
131129 }
132130 }
133131
@@ -141,7 +139,7 @@ public URI uri() {
141139 return uri ;
142140 }
143141
144- public int maxInputTokens () {
142+ public Integer maxInputTokens () {
145143 return maxInputTokens ;
146144 }
147145
@@ -161,10 +159,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
161159
162160 @ Override
163161 protected XContentBuilder toXContentFragmentOfExposedFields (XContentBuilder builder , Params params ) throws IOException {
164- builder .field (MODEL_ID , modelId );
165-
162+ if (modelId != null ) {
163+ builder .field (MODEL_ID , modelId );
164+ }
166165 builder .field (URL , uri .toString ());
167- builder .field (MAX_INPUT_TOKENS , maxInputTokens );
166+ if (maxInputTokens != null ) {
167+ builder .field (MAX_INPUT_TOKENS , maxInputTokens );
168+ }
168169 rateLimitSettings .toXContent (builder , params );
169170
170171 return builder ;
@@ -177,13 +178,13 @@ public String getWriteableName() {
177178
178179 @ Override
179180 public TransportVersion getMinimalSupportedVersion () {
180- return TransportVersions .V_8_12_0 ;
181+ return TransportVersions .V_8_14_0 ;
181182 }
182183
183184 @ Override
184185 public void writeTo (StreamOutput out ) throws IOException {
185- out .writeString (modelId );
186- out .writeOptionalString (uri != null ? uri .toString () : null );
186+ out .writeOptionalString (modelId );
187+ out .writeString (uri .toString ());
187188 out .writeOptionalVInt (maxInputTokens );
188189
189190 if (out .getTransportVersion ().onOrAfter (TransportVersions .V_8_15_0 )) {
0 commit comments