Skip to content

Commit baa09b1

Browse files
committed
fix ToolIntegrationWithLLMTest model undeploy race condition
Previously the test class attempted to delete a model without fully knowing if the model was undeployed in time. This change adds a waiting for 5 retries each 1 second to check the status of the model and only when undeployed will it proceed to delete the model. When the number of retries are exceeded it throws a error indicating a deeper problem. Manual testing was done to check that the model is undeployed by searching for the specific model via the checkForModelUndeployedStatus method. Signed-off-by: Brian Flores <[email protected]>
1 parent f888c27 commit baa09b1

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

plugin/src/test/java/org/opensearch/ml/rest/RestMLGuardrailsIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ public void testPredictRemoteModelFailedWithModelGuardrail() throws IOException,
281281
Map responseMap = parseResponseToMap(response);
282282
String guardrailConnectorId = (String) responseMap.get("connector_id");
283283

284-
//Create the model ID
284+
// Create the model ID
285285
response = registerRemoteModel("guardrail model group", "openAI-GPT-3.5 completions", guardrailConnectorId);
286286
responseMap = parseResponseToMap(response);
287287
String guardrailModelId = (String) responseMap.get("model_id");

plugin/src/test/java/org/opensearch/ml/tools/ToolIntegrationWithLLMTest.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import java.util.List;
1010
import java.util.Locale;
1111
import java.util.Map;
12-
import java.util.Set;
1312
import java.util.UUID;
1413
import java.util.concurrent.TimeUnit;
1514
import java.util.function.Predicate;
@@ -32,7 +31,7 @@
3231
@Log4j2
3332
public abstract class ToolIntegrationWithLLMTest extends RestBaseAgentToolsIT {
3433

35-
private static final int MAX_TASK_RESULT_QUERY_TIME_IN_SECOND = 30;
34+
private static final int MAX_RETRIES = 5;
3635
private static final int DEFAULT_TASK_RESULT_QUERY_INTERVAL_IN_MILLISECOND = 1000;
3736

3837
protected HttpServer server;
@@ -72,16 +71,17 @@ public void stopMockLLM() {
7271
@After
7372
public void deleteModel() throws IOException {
7473
undeployModel(modelId);
74+
checkForModelUndeployedStatus(modelId);
7575
deleteModel(client(), modelId, null);
7676
}
7777

7878
@SneakyThrows
79-
private void waitModelUndeployed(String modelId) {
79+
private void checkForModelUndeployedStatus(String modelId) {
8080
Predicate<Response> condition = response -> {
8181
try {
8282
Map<String, Object> responseInMap = parseResponseToMap(response);
8383
MLModelState state = MLModelState.from(responseInMap.get(MLModel.MODEL_STATE_FIELD).toString());
84-
return Set.of(MLModelState.UNDEPLOYED, MLModelState.DEPLOY_FAILED).contains(state);
84+
return MLModelState.UNDEPLOYED.equals(state);
8585
} catch (Exception e) {
8686
return false;
8787
}
@@ -91,16 +91,25 @@ private void waitModelUndeployed(String modelId) {
9191

9292
@SneakyThrows
9393
protected Response waitResponseMeetingCondition(String method, String endpoint, String jsonEntity, Predicate<Response> condition) {
94-
for (int i = 0; i < MAX_TASK_RESULT_QUERY_TIME_IN_SECOND; i++) {
94+
for (int attempt = 1; attempt <= MAX_RETRIES; attempt++) {
9595
Response response = TestHelper.makeRequest(client(), method, endpoint, null, jsonEntity, null);
9696
assertEquals(RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode()));
9797
if (condition.test(response)) {
9898
return response;
9999
}
100-
logger.info("The {}-th response: {}", i, response.toString());
100+
logger.info("The {}-th attempt on {}:{} . response: {}", attempt, method, endpoint, response.toString());
101101
Thread.sleep(DEFAULT_TASK_RESULT_QUERY_INTERVAL_IN_MILLISECOND);
102102
}
103-
fail("The response failed to meet condition after " + MAX_TASK_RESULT_QUERY_TIME_IN_SECOND + " seconds.");
103+
fail(
104+
String
105+
.format(
106+
Locale.ROOT,
107+
"The response failed to meet condition after %d attempts. Attempted to perform %s : %s",
108+
MAX_RETRIES,
109+
method,
110+
endpoint
111+
)
112+
);
104113
return null;
105114
}
106115

0 commit comments

Comments
 (0)