Skip to content

Commit 274b59f

Browse files
authored
test model over-commit (#7021)
1 parent 229012b commit 274b59f

File tree

4 files changed

+111
-5
lines changed

4 files changed

+111
-5
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
@ModelDeployment @Functional @Models @CustomModelSpec @OverCommit
2+
Feature: Explicit Model deployment
3+
I deploy 3 iris models, each requiring 334MB. The server memory capacity is 1GB,
4+
with a 10% allowance for over-commit. The third model should be evicted to disk. Send
5+
inference requests to all models, expected them all to pass, as the agent will
6+
automatically load the evicted model on-the-fly when req received
7+
8+
Scenario: Deploy 3 identical models and send inference
9+
Given I deploy model spec with timeout "10s":
10+
"""
11+
apiVersion: mlops.seldon.io/v1alpha1
12+
kind: Model
13+
metadata:
14+
name: "overcommit-1"
15+
spec:
16+
replicas: 1
17+
requirements:
18+
- sklearn
19+
- mlserver
20+
memory: 334000000
21+
storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
22+
"""
23+
When the model "overcommit-1" should eventually become Ready with timeout "20s"
24+
Given I deploy model spec with timeout "10s":
25+
"""
26+
apiVersion: mlops.seldon.io/v1alpha1
27+
kind: Model
28+
metadata:
29+
name: "overcommit-2"
30+
spec:
31+
replicas: 1
32+
requirements:
33+
- sklearn
34+
- mlserver
35+
memory: 334000000
36+
storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
37+
"""
38+
When the model "overcommit-2" should eventually become Ready with timeout "20s"
39+
Given I deploy model spec with timeout "10s":
40+
"""
41+
apiVersion: mlops.seldon.io/v1alpha1
42+
kind: Model
43+
metadata:
44+
name: "overcommit-3"
45+
spec:
46+
replicas: 1
47+
requirements:
48+
- sklearn
49+
- mlserver
50+
memory: 334000000
51+
storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
52+
"""
53+
When the model "overcommit-3" should eventually become Ready with timeout "20s"
54+
Then send HTTP inference request with timeout "20s" to model "overcommit-1" with payload:
55+
"""
56+
{
57+
"inputs": [
58+
{
59+
"name": "predict",
60+
"shape": [1, 4],
61+
"datatype": "FP32",
62+
"data": [[1, 2, 3, 4]]
63+
}
64+
]
65+
}
66+
"""
67+
And expect http response status code "200"
68+
Then send HTTP inference request with timeout "20s" to model "overcommit-2" with payload:
69+
"""
70+
{
71+
"inputs": [
72+
{
73+
"name": "predict",
74+
"shape": [1, 4],
75+
"datatype": "FP32",
76+
"data": [[1, 2, 3, 4]]
77+
}
78+
]
79+
}
80+
"""
81+
And expect http response status code "200"
82+
Then send HTTP inference request with timeout "20s" to model "overcommit-3" with payload:
83+
"""
84+
{
85+
"inputs": [
86+
{
87+
"name": "predict",
88+
"shape": [1, 4],
89+
"datatype": "FP32",
90+
"data": [[1, 2, 3, 4]]
91+
}
92+
]
93+
}
94+
"""
95+
And expect http response status code "200"

tests/integration/godog/features/model/server_setup.feature

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ Feature: Server setup
1515
spec:
1616
replicas: 1
1717
serverConfig: mlserver
18+
podSpec:
19+
containers:
20+
- name: agent
21+
env:
22+
- name: SELDON_OVERCOMMIT_PERCENTAGE
23+
value: "10"
24+
- name: MEMORY_REQUEST
25+
value: "1073741824"
1826
"""
1927
When the server should eventually become Ready with timeout "30s"
2028
Then ensure only "1" pod(s) are deployed for server and they are Ready

tests/integration/godog/main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const cmdOptPrefix = "godog."
2424

2525
var opts = godog.Options{
2626
Output: colors.Colored(os.Stdout),
27-
Format: "progress", // can define default values
27+
Format: "pretty", // can define default values
2828
}
2929

3030
func init() {

tests/integration/godog/steps/server_steps.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,20 +103,23 @@ func (s *server) deployServerSpec(ctx context.Context, spec *godog.DocString) er
103103
serverSpec.Namespace = s.namespace
104104
s.currentServer = serverSpec
105105
s.applyScenarioLabel()
106+
107+
s.log.Debugf("Attempts to create server %s", serverSpec.Name)
108+
106109
if _, err := s.seldonK8sClient.MlopsV1alpha1().Servers(s.namespace).Create(ctx, s.currentServer, metav1.CreateOptions{}); err != nil {
107110
if k8serrors.IsAlreadyExists(err) {
108111
s.log.Debugf("server %s already exists, checking if equal", s.currentServer.Name)
109-
deployerServer, err := s.seldonK8sClient.MlopsV1alpha1().Servers(s.namespace).Get(ctx, s.currentServer.Name, metav1.GetOptions{})
112+
deployedServer, err := s.seldonK8sClient.MlopsV1alpha1().Servers(s.namespace).Get(ctx, s.currentServer.Name, metav1.GetOptions{})
110113
if err != nil {
111114
return fmt.Errorf("failed getting server: %w", err)
112115
}
113-
if equality.Semantic.DeepEqual(serverSpec.Spec, deployerServer.Spec) {
116+
if equality.Semantic.DeepEqual(serverSpec.Spec, deployedServer.Spec) {
114117
s.log.Debugf("server %s deployed spec equals desired spec", s.currentServer.Name)
115118
return nil
116119
}
117120
s.log.Debugf("server %s deployed spec needs updating to desired spec", s.currentServer.Name)
118-
deployerServer.Spec = s.currentServer.Spec
119-
if _, err := s.seldonK8sClient.MlopsV1alpha1().Servers(s.namespace).Update(ctx, deployerServer, metav1.UpdateOptions{}); err != nil {
121+
deployedServer.Spec = s.currentServer.Spec
122+
if _, err := s.seldonK8sClient.MlopsV1alpha1().Servers(s.namespace).Update(ctx, deployedServer, metav1.UpdateOptions{}); err != nil {
120123
return fmt.Errorf("failed updating server: %w", err)
121124
}
122125
return nil

0 commit comments

Comments
 (0)