@@ -1299,64 +1299,6 @@ private InferenceEventsAssertion testUnifiedStream(int responseCode, String resp
12991299 }
13001300 }
13011301
1302- public void testUnifiedCompletionInfer_WithGoogleVertexAiModel () throws IOException {
1303- var elasticInferenceServiceURL = getUrl (webServer );
1304- var senderFactory = HttpRequestSenderTests .createSenderFactory (threadPool , clientManager );
1305-
1306- try (var service = createService (senderFactory , elasticInferenceServiceURL )) {
1307- // Mock a successful streaming response
1308- String responseJson = """
1309- data: {"id":"1","object":"completion","created":1677858242,"model":"my-model-id",
1310- "choices":[{"finish_reason":null,"index":0,"delta":{"role":"assistant","content":"Hello"}}]}
1311-
1312- data: {"id":"2","object":"completion","created":1677858242,"model":"my-model-id",
1313- "choices":[{"finish_reason":"stop","index":0,"delta":{"content":" world!"}}]}
1314-
1315- data: [DONE]
1316-
1317- """ ;
1318-
1319- webServer .enqueue (new MockResponse ().setResponseCode (200 ).setBody (responseJson ));
1320-
1321- // Create chat completion model
1322- var model = new ElasticInferenceServiceCompletionModel (
1323- "id" ,
1324- TaskType .CHAT_COMPLETION ,
1325- "elastic" ,
1326- new ElasticInferenceServiceCompletionServiceSettings ("gemini-2.0-flash-001" , new RateLimitSettings (100 )),
1327- EmptyTaskSettings .INSTANCE ,
1328- EmptySecretSettings .INSTANCE ,
1329- ElasticInferenceServiceComponents .of (elasticInferenceServiceURL )
1330- );
1331-
1332- var request = UnifiedCompletionRequest .of (
1333- List .of (new UnifiedCompletionRequest .Message (new UnifiedCompletionRequest .ContentString ("Hello" ), "user" , null , null ))
1334- );
1335-
1336- PlainActionFuture <InferenceServiceResults > listener = new PlainActionFuture <>();
1337-
1338- try {
1339- service .unifiedCompletionInfer (model , request , InferenceAction .Request .DEFAULT_TIMEOUT , listener );
1340- listener .actionGet (TIMEOUT );
1341-
1342- // Verify the request was sent
1343- assertThat (webServer .requests (), hasSize (1 ));
1344- var httpRequest = webServer .requests ().getFirst ();
1345-
1346- // Check that the Gemini API was called.
1347- assertThat (
1348- httpRequest .getBody ().toString (),
1349- equalTo (
1350- "{\" messages\" :[{\" content\" :\" Hello\" ,\" role\" :\" user\" }],\" n\" :1,\" stream\" :true,\" stream_options\" :{\" include_usage\" :true},\" model\" :\" gemini-2.0-flash-001\" }"
1351- )
1352- );
1353- } finally {
1354- // Clean up the thread context
1355- threadPool .getThreadContext ().stashContext ();
1356- }
1357- }
1358- }
1359-
13601302 private void ensureAuthorizationCallFinished (ElasticInferenceService service ) {
13611303 service .onNodeStarted ();
13621304 service .waitForFirstAuthorizationToComplete (TIMEOUT );
0 commit comments