Skip to content

Commit 759fff0

Browse files
Added resource group isolation to DeploymentCache (#103)
* Added resource group to DeploymentCache * PMD * Matthias' suggestions --------- Co-authored-by: Matthias Kuhr <[email protected]>
1 parent a54ca59 commit 759fff0

File tree

5 files changed

+130
-113
lines changed

5 files changed

+130
-113
lines changed

core/src/main/java/com/sap/ai/sdk/core/AiCoreService.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,15 @@ protected ApiClient getApiClient(@Nonnull final Destination destination) {
210210

211211
return new ApiClient(rt).setBasePath(destination.asHttp().getUri().toString());
212212
}
213+
214+
/**
215+
* Remove all entries from the cache then load all deployments into the cache.
216+
*
217+
* <p><b>Call this whenever a deployment is deleted.</b>
218+
*
219+
* @param resourceGroup the resource group of the deleted deployment, usually "default".
220+
*/
221+
public void reloadCachedDeployments(@Nonnull final String resourceGroup) {
222+
DEPLOYMENT_CACHE.resetCache(client(), resourceGroup);
223+
}
213224
}

core/src/main/java/com/sap/ai/sdk/core/DeploymentCache.java

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.util.NoSuchElementException;
1010
import java.util.Optional;
1111
import java.util.Set;
12+
import java.util.concurrent.ConcurrentHashMap;
1213
import javax.annotation.Nonnull;
1314
import lombok.extern.slf4j.Slf4j;
1415

@@ -20,7 +21,7 @@
2021
class DeploymentCache {
2122

2223
/** Cache for deployment ids. The key is the model name and the value is the deployment id. */
23-
protected final Set<AiDeployment> CACHE = new HashSet<>();
24+
private final Map<String, Set<AiDeployment>> CACHE = new ConcurrentHashMap<>();
2425

2526
/**
2627
* Remove all entries from the cache then load all deployments into the cache.
@@ -30,32 +31,12 @@ class DeploymentCache {
3031
* @param client the API client to query deployments.
3132
* @param resourceGroup the resource group, usually "default".
3233
*/
33-
public void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
34-
clearCache();
35-
loadCache(client, resourceGroup);
36-
}
37-
38-
/**
39-
* Remove all entries from the cache.
40-
*
41-
* <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
42-
*/
43-
protected void clearCache() {
44-
CACHE.clear();
45-
}
46-
47-
/**
48-
* Load all deployments into the cache
49-
*
50-
* <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
51-
*
52-
* @param client the API client to query deployments.
53-
* @param resourceGroup the resource group, usually "default".
54-
*/
55-
protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
34+
void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
35+
CACHE.remove(resourceGroup);
5636
try {
57-
final var deployments = new DeploymentApi(client).query(resourceGroup).getResources();
58-
CACHE.addAll(deployments);
37+
final var deployments =
38+
new HashSet<>(new DeploymentApi(client).query(resourceGroup).getResources());
39+
CACHE.put(resourceGroup, deployments);
5940
} catch (final OpenApiRequestException e) {
6041
log.error("Failed to load deployments into cache", e);
6142
}
@@ -72,25 +53,26 @@ protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String
7253
* @throws NoSuchElementException if no running deployment is found for the model.
7354
*/
7455
@Nonnull
75-
public String getDeploymentIdByModel(
56+
String getDeploymentIdByModel(
7657
@Nonnull final ApiClient client,
7758
@Nonnull final String resourceGroup,
7859
@Nonnull final String modelName)
7960
throws NoSuchElementException {
80-
return getDeploymentIdByModel(modelName)
61+
return getDeploymentIdByModel(resourceGroup, modelName)
8162
.orElseGet(
8263
() -> {
8364
resetCache(client, resourceGroup);
84-
return getDeploymentIdByModel(modelName)
65+
return getDeploymentIdByModel(resourceGroup, modelName)
8566
.orElseThrow(
8667
() ->
8768
new NoSuchElementException(
8869
"No running deployment found for model: " + modelName));
8970
});
9071
}
9172

92-
private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName) {
93-
return CACHE.stream()
73+
private Optional<String> getDeploymentIdByModel(
74+
@Nonnull final String resourceGroup, @Nonnull final String modelName) {
75+
return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
9476
.filter(deployment -> isDeploymentOfModel(modelName, deployment))
9577
.findFirst()
9678
.map(AiDeployment::getId);
@@ -107,25 +89,26 @@ private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName)
10789
* @throws NoSuchElementException if no running deployment is found for the scenario.
10890
*/
10991
@Nonnull
110-
public String getDeploymentIdByScenario(
92+
String getDeploymentIdByScenario(
11193
@Nonnull final ApiClient client,
11294
@Nonnull final String resourceGroup,
11395
@Nonnull final String scenarioId)
11496
throws NoSuchElementException {
115-
return getDeploymentIdByScenario(scenarioId)
97+
return getDeploymentIdByScenario(resourceGroup, scenarioId)
11698
.orElseGet(
11799
() -> {
118100
resetCache(client, resourceGroup);
119-
return getDeploymentIdByScenario(scenarioId)
101+
return getDeploymentIdByScenario(resourceGroup, scenarioId)
120102
.orElseThrow(
121103
() ->
122104
new NoSuchElementException(
123105
"No running deployment found for scenario: " + scenarioId));
124106
});
125107
}
126108

127-
private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenarioId) {
128-
return CACHE.stream()
109+
private Optional<String> getDeploymentIdByScenario(
110+
@Nonnull final String resourceGroup, @Nonnull final String scenarioId) {
111+
return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
129112
.filter(deployment -> scenarioId.equals(deployment.getScenarioId()))
130113
.findFirst()
131114
.map(AiDeployment::getId);
@@ -138,7 +121,7 @@ private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenari
138121
* @param deployment The deployment.
139122
* @return true if the deployment is of the model.
140123
*/
141-
protected static boolean isDeploymentOfModel(
124+
private static boolean isDeploymentOfModel(
142125
@Nonnull final String modelName, @Nonnull final AiDeployment deployment) {
143126
final var deploymentDetails = deployment.getDetails();
144127
// The AI Core specification doesn't mention that this is nullable, but it can be.

core/src/test/java/com/sap/ai/sdk/core/CacheTest.java

Lines changed: 62 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
import static com.github.tomakehurst.wiremock.client.WireMock.get;
66
import static com.github.tomakehurst.wiremock.client.WireMock.getRequestedFor;
77
import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo;
8+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
89

910
import com.sap.ai.sdk.core.client.WireMockTestServer;
10-
import org.apache.hc.core5.http.HttpStatus;
11+
import com.sap.cloud.sdk.cloudplatform.connectivity.DefaultHttpDestination;
12+
import java.util.NoSuchElementException;
1113
import org.junit.jupiter.api.BeforeEach;
1214
import org.junit.jupiter.api.Test;
1315

@@ -20,67 +22,24 @@ void setupCache() {
2022
wireMockServer.resetRequests();
2123
}
2224

23-
private static void stubGPT4() {
25+
private static void stubGPT4(String resourceGroup) {
2426
wireMockServer.stubFor(
2527
get(urlPathEqualTo("/v2/lm/deployments"))
26-
.withHeader("AI-Resource-Group", equalTo("default"))
28+
.withHeader("AI-Resource-Group", equalTo(resourceGroup))
2729
.willReturn(
2830
aResponse()
29-
.withStatus(HttpStatus.SC_OK)
30-
.withHeader("content-type", "application/json")
31-
.withBody(
32-
"""
33-
{
34-
"count": 1,
35-
"resources": [
36-
{
37-
"configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
38-
"configurationName": "gpt-4-32k",
39-
"createdAt": "2024-04-17T15:19:53Z",
40-
"deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
41-
"details": {
42-
"resources": {
43-
"backend_details": {
44-
"model": {
45-
"name": "gpt-4-32k",
46-
"version": "latest"
47-
}
48-
}
49-
},
50-
"scaling": {
51-
"backend_details": {}
52-
}
53-
},
54-
"id": "d19b998f347341aa",
55-
"lastOperation": "CREATE",
56-
"latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
57-
"modifiedAt": "2024-05-07T13:05:45Z",
58-
"scenarioId": "foundation-models",
59-
"startTime": "2024-04-17T15:21:15Z",
60-
"status": "RUNNING",
61-
"submissionTime": "2024-04-17T15:20:11Z",
62-
"targetStatus": "RUNNING"
63-
}
64-
]
65-
}
66-
""")));
31+
.withBodyFile("GPT4DeploymentResponse.json")
32+
.withHeader("Content-Type", "application/json")));
6733
}
6834

69-
private static void stubEmpty() {
35+
private static void stubEmpty(String resourceGroup) {
7036
wireMockServer.stubFor(
7137
get(urlPathEqualTo("/v2/lm/deployments"))
72-
.withHeader("AI-Resource-Group", equalTo("default"))
38+
.withHeader("AI-Resource-Group", equalTo(resourceGroup))
7339
.willReturn(
7440
aResponse()
75-
.withStatus(HttpStatus.SC_OK)
76-
.withHeader("content-type", "application/json")
77-
.withBody(
78-
"""
79-
{
80-
"count": 0,
81-
"resources": []
82-
}
83-
""")));
41+
.withBodyFile("emptyDeploymentResponse.json")
42+
.withHeader("content-type", "application/json")));
8443
}
8544

8645
/**
@@ -94,31 +53,16 @@ private static void stubEmpty() {
9453
*/
9554
@Test
9655
void newDeployment() {
97-
stubGPT4();
98-
cacheUnderTest.loadCache(client, "default");
56+
String resourceGroup = "default";
57+
stubGPT4(resourceGroup);
9958

100-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
59+
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
10160
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
10261

103-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
62+
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
10463
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
10564
}
10665

107-
@Test
108-
void clearCache() {
109-
stubGPT4();
110-
cacheUnderTest.loadCache(client, "default");
111-
112-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
113-
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
114-
115-
cacheUnderTest.clearCache();
116-
117-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
118-
// the deployment is not in the cache anymore, so we need to query it again
119-
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
120-
}
121-
12266
/**
12367
* The user creates a deployment after starting with an empty cache.
12468
*
@@ -130,15 +74,57 @@ void clearCache() {
13074
*/
13175
@Test
13276
void newDeploymentAfterReset() {
133-
stubEmpty();
134-
cacheUnderTest.loadCache(client, "default");
135-
stubGPT4();
77+
String resourceGroup = "default";
78+
stubEmpty(resourceGroup);
79+
cacheUnderTest.resetCache(client, resourceGroup);
80+
stubGPT4(resourceGroup);
13681

137-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
82+
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
13883
// 1 reset empty and 1 cache miss
13984
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
14085

141-
cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
86+
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
87+
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
88+
}
89+
90+
@Test
91+
void resourceGroupIsolation() {
92+
String resourceGroupA = "A";
93+
String resourceGroupB = "B";
94+
stubGPT4(resourceGroupA);
95+
stubGPT4(resourceGroupB);
96+
97+
cacheUnderTest.getDeploymentIdByModel(client, resourceGroupA, "gpt-4-32k");
98+
wireMockServer.verify(
99+
1,
100+
getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
101+
.withHeader("AI-Resource-Group", equalTo(resourceGroupA)));
102+
wireMockServer.verify(
103+
0,
104+
getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
105+
.withHeader("AI-Resource-Group", equalTo(resourceGroupB)));
106+
}
107+
108+
@Test
109+
void exceptionDeploymentNotFound() {
110+
String resourceGroup = "default";
111+
stubEmpty(resourceGroup);
112+
113+
assertThatThrownBy(
114+
() -> cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k"))
115+
.isExactlyInstanceOf(NoSuchElementException.class)
116+
.hasMessageContaining("No running deployment found for model: gpt-4-32k");
117+
}
118+
119+
@Test
120+
void resetCache() {
121+
String resourceGroup = "default";
122+
stubGPT4(resourceGroup);
123+
cacheUnderTest.resetCache(client, resourceGroup);
124+
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
125+
126+
final var destination = DefaultHttpDestination.builder(wireMockServer.baseUrl()).build();
127+
new AiCoreService().withDestination(destination).reloadCachedDeployments(resourceGroup);
142128
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
143129
}
144130
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"count": 1,
3+
"resources": [
4+
{
5+
"configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
6+
"configurationName": "gpt-4-32k",
7+
"createdAt": "2024-04-17T15:19:53Z",
8+
"deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
9+
"details": {
10+
"resources": {
11+
"backend_details": {
12+
"model": {
13+
"name": "gpt-4-32k",
14+
"version": "latest"
15+
}
16+
}
17+
},
18+
"scaling": {
19+
"backend_details": {}
20+
}
21+
},
22+
"id": "d19b998f347341aa",
23+
"lastOperation": "CREATE",
24+
"latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
25+
"modifiedAt": "2024-05-07T13:05:45Z",
26+
"scenarioId": "foundation-models",
27+
"startTime": "2024-04-17T15:21:15Z",
28+
"status": "RUNNING",
29+
"submissionTime": "2024-04-17T15:20:11Z",
30+
"targetStatus": "RUNNING"
31+
}
32+
]
33+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"count": 0,
3+
"resources": []
4+
}

0 commit comments

Comments
 (0)