From 2d861438ef134e78ad5408f611ca8a807d7a69a9 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 4 Apr 2023 21:11:29 -0600
Subject: [PATCH 1/3] blue-green modelmesh use case

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 .../controllers/blue-green/bumpweights.sh     |   4 +
 testdata/controllers/blue-green/cleanup.sh    |  10 +
 .../controllers/blue-green/execintosleep.sh   |   6 +
 testdata/controllers/blue-green/initialize.sh | 150 +++++++
 testdata/controllers/blue-green/promote-v2.sh |  31 ++
 testdata/controllers/blue-green/sleep.sh      | 377 ++++++++++++++++++
 testdata/controllers/blue-green/steps.sh      |  41 ++
 .../controllers/blue-green/v2-candidate.sh    |  27 ++
 testdata/controllers/config.yaml              |   9 +-
 9 files changed, 654 insertions(+), 1 deletion(-)
 create mode 100755 testdata/controllers/blue-green/bumpweights.sh
 create mode 100755 testdata/controllers/blue-green/cleanup.sh
 create mode 100755 testdata/controllers/blue-green/execintosleep.sh
 create mode 100755 testdata/controllers/blue-green/initialize.sh
 create mode 100755 testdata/controllers/blue-green/promote-v2.sh
 create mode 100755 testdata/controllers/blue-green/sleep.sh
 create mode 100644 testdata/controllers/blue-green/steps.sh
 create mode 100755 testdata/controllers/blue-green/v2-candidate.sh

diff --git a/testdata/controllers/blue-green/bumpweights.sh b/testdata/controllers/blue-green/bumpweights.sh
new file mode 100755
index 000000000..38ed5dc5a
--- /dev/null
+++ b/testdata/controllers/blue-green/bumpweights.sh
@@ -0,0 +1,4 @@
+echo "kubectl annotate --overwrite isvc wisdom-primary-weight iter8.tools/weight='20'"
+echo "kubectl annotate --overwrite isvc wisdom-candidate-weight iter8.tools/weight='80'"
+kubectl annotate --overwrite cm wisdom-primary-weight iter8.tools/weight='20'
+kubectl annotate --overwrite cm wisdom-candidate-weight iter8.tools/weight='80'
diff --git a/testdata/controllers/blue-green/cleanup.sh b/testdata/controllers/blue-green/cleanup.sh
new file mode 100755
index 000000000..b02d1d4b0
--- /dev/null
+++ b/testdata/controllers/blue-green/cleanup.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Cleanup application
+kubectl -n modelmesh-serving delete isvc/wisdom-primary cm/wisdom-primary-weight
+kubectl -n modelmesh-serving delete isvc/wisdom-candidate cm/wisdom-candidate-weight
+# Cleanup routemap(s)
+kubectl delete cm/wisdom
+# Cleanup networking
+kubectl delete svc/wisdom gateway/wisdom-gateway virtualservice/wisdom
+# Cleanup sleep utility
+kubectl delete deploy/sleep cm/wisdom-input
diff --git a/testdata/controllers/blue-green/execintosleep.sh b/testdata/controllers/blue-green/execintosleep.sh
new file mode 100755
index 000000000..1391b7697
--- /dev/null
+++ b/testdata/controllers/blue-green/execintosleep.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# First, get $SLEEP_POD
+SLEEP_POD=$(kubectl get pod --sort-by={metadata.creationTimestamp} -l app=sleep -o jsonpath={.items..metadata.name} | rev | cut -d' ' -f 1 | rev)
+# Second, exec into it
+kubectl exec --stdin --tty "${SLEEP_POD}" -c sleep -- /bin/sh
+# Third, cd wisdom && source query.sh in order to query wisdom
\ No newline at end of file
diff --git a/testdata/controllers/blue-green/initialize.sh b/testdata/controllers/blue-green/initialize.sh
new file mode 100755
index 000000000..ef55f3b2a
--- /dev/null
+++ b/testdata/controllers/blue-green/initialize.sh
@@ -0,0 +1,150 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Service
+metadata:
+  name: wisdom
+spec:
+  externalName: istio-ingressgateway.istio-system.svc.cluster.local
+  sessionAffinity: None
+  type: ExternalName
+---
+# use mesh gateway instead of this
+apiVersion: networking.istio.io/v1alpha3
+kind: Gateway
+metadata:
+  name: wisdom-gateway
+spec:
+  selector:
+    istio: ingressgateway
+  servers:
+  - port:
+      number: 80
+      name: http
+      protocol: HTTP
+    hosts:
+    - wisdom.modelmesh-serving
+    - wisdom.modelmesh-serving.svc
+    - wisdom.modelmesh-serving.svc.cluster.local
+---
+# Model component 1 - inferenceservice
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v1
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+---
+# Model component 2 - configmap to be used to configure weights at runtime
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-primary-weight
+  labels:
+    iter8.tools/watch: "true"
+---
+# Set up default routing (ie, to primary)
+# The Iter8 traffic controller could do this, but don't rely on it
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: wisdom
+spec:
+  gateways:
+  - wisdom-gateway
+  hosts:
+  - wisdom.modelmesh-serving
+  - wisdom.modelmesh-serving.svc
+  - wisdom.modelmesh-serving.svc.cluster.local
+  http:
+  - route:
+    - destination:
+        host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+        port:
+          number: 8033
+      headers:
+        request:
+          set:
+            mm-vmodel-id: "wisdom-primary"
+---
+# Create routemap for blue-green use case
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom
+  labels:
+    app.kubernetes.io/managed-by: iter8
+    iter8.tools/kind: routemap
+    iter8.tools/version: v0.14
+data:
+  strSpec: |
+    variants: 
+    - weight: 60
+      resources:
+      - gvrShort: cm
+        name: wisdom-primary-weight
+        namespace: modelmesh-serving
+      - gvrShort: isvc
+        name: wisdom-primary
+        namespace: modelmesh-serving
+    - weight: 40
+      resources:
+      - gvrShort: cm
+        name: wisdom-candidate-weight
+        namespace: modelmesh-serving
+      - gvrShort: isvc
+        name: wisdom-candidate
+        namespace: modelmesh-serving
+    # routing templates
+    routingTemplates:
+      blue-green-wisdom:
+        gvrShort: vs
+        template: |
+          apiVersion: networking.istio.io/v1beta1
+          kind: VirtualService
+          metadata:
+            name: wisdom
+          spec:
+            gateways:
+            - wisdom-gateway
+            hosts:
+            - wisdom.modelmesh-serving
+            - wisdom.modelmesh-serving.svc
+            - wisdom.modelmesh-serving.svc.cluster.local
+            http:
+            - route:
+              - destination:
+                  host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+                {{- if gt (index .Weights 1) 0 }}
+                weight: {{ index .Weights 0 }}
+                {{- end }}
+                headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-primary"
+              {{- if gt (index .Weights 1) 0 }}
+              - destination:
+                  host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+                weight: {{ index .Weights 1 }}
+                headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-candidate"
+              {{- end }}
+immutable: true   
+EOF
\ No newline at end of file
diff --git a/testdata/controllers/blue-green/promote-v2.sh b/testdata/controllers/blue-green/promote-v2.sh
new file mode 100755
index 000000000..4842bfa3a
--- /dev/null
+++ b/testdata/controllers/blue-green/promote-v2.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# replace primary with new version
+cat <<EOF | kubectl replace -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-primary-weight
+  labels:
+    iter8.tools/watch: "true"
+EOF
+# delete candidate
+kubectl delete isvc/wisdom-candidate cm/wisdom-candidate-weight
diff --git a/testdata/controllers/blue-green/sleep.sh b/testdata/controllers/blue-green/sleep.sh
new file mode 100755
index 000000000..678f1a406
--- /dev/null
+++ b/testdata/controllers/blue-green/sleep.sh
@@ -0,0 +1,377 @@
+#!/bin/sh
+cat <<EOF | istioctl kube-inject -f - | kubectl apply -f -
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sleep
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: sleep
+  template:
+    metadata:
+      labels:
+        app: sleep
+    spec:
+      containers:
+      - name: sleep
+        image: fullstorydev/grpcurl:latest-alpine
+        command: ["/bin/sh", "-c", "sleep 3650d"]
+        imagePullPolicy: IfNotPresent
+        volumeMounts:
+        - name: config-volume
+          mountPath: /wisdom
+      volumes:
+      - name: config-volume
+        configMap:
+          name: wisdom-input
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-input
+data:
+  kserve.proto: |
+    syntax = "proto3";
+    package inference;
+    option go_package = "github.com/kserve/modelmesh-serving/fvt/generated;inference";
+
+    // Inference Server GRPC endpoints.
+    service GRPCInferenceService
+    {
+      // The ServerLive API indicates if the inference server is able to receive
+      // and respond to metadata and inference requests.
+      rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
+
+      // The ServerReady API indicates if the server is ready for inferencing.
+      rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
+
+      // The ModelReady API indicates if a specific model is ready for inferencing.
+      rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
+
+      // The ServerMetadata API provides information about the server. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
+
+      // The per-model metadata API provides information about a model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
+
+      // The ModelInfer API performs inference using the specified model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+    }
+
+    message ServerLiveRequest {}
+
+    message ServerLiveResponse
+    {
+      // True if the inference server is live, false if not live.
+      bool live = 1;
+    }
+
+    message ServerReadyRequest {}
+
+    message ServerReadyResponse
+    {
+      // True if the inference server is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ModelReadyRequest
+    {
+      // The name of the model to check for readiness.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelReadyResponse
+    {
+      // True if the model is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ServerMetadataRequest {}
+
+    message ServerMetadataResponse
+    {
+      // The server name.
+      string name = 1;
+
+      // The server version.
+      string version = 2;
+
+      // The extensions supported by the server.
+      repeated string extensions = 3;
+    }
+
+    message ModelMetadataRequest
+    {
+      // The name of the model.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelMetadataResponse
+    {
+      // Metadata for a tensor.
+      message TensorMetadata
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape. A variable-size dimension is represented
+        // by a -1 value.
+        repeated int64 shape = 3;
+      }
+
+      // The model name.
+      string name = 1;
+
+      // The versions of the model available on the server.
+      repeated string versions = 2;
+
+      // The model's platform. See Platforms.
+      string platform = 3;
+
+      // The model's inputs.
+      repeated TensorMetadata inputs = 4;
+
+      // The model's outputs.
+      repeated TensorMetadata outputs = 5;
+    }
+
+    message ModelInferRequest
+    {
+      // An input tensor for an inference request.
+      message InferInputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional inference input tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference request.
+        InferTensorContents contents = 5;
+      }
+
+      // An output tensor requested for an inference request.
+      message InferRequestedOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // Optional requested output tensor parameters.
+        map<string, InferParameter> parameters = 2;
+      }
+
+      // The name of the model to use for inferencing.
+      string model_name = 1;
+
+      // The version of the model to use for inference. If not given the
+      // server will choose a version based on the model and internal policy.
+      string model_version = 2;
+
+      // Optional identifier for the request. If specified will be
+      // returned in the response.
+      string id = 3;
+
+      // Optional inference parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The input tensors for the inference.
+      repeated InferInputTensor inputs = 5;
+
+      // The requested output tensors for the inference. Optional, if not
+      // specified all outputs produced by the model will be returned.
+      repeated InferRequestedOutputTensor outputs = 6;
+
+      // The data contained in an input tensor can be represented in "raw"
+      // bytes form or in the repeated type that matches the tensor's data
+      // type. To use the raw representation 'raw_input_contents' must be
+      // initialized with data for each tensor in the same order as
+      // 'inputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferInputTensor::contents must
+      // not be specified for any input tensor.
+      repeated bytes raw_input_contents = 7;
+    }
+
+    message ModelInferResponse
+    {
+      // An output tensor returned for an inference request.
+      message InferOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional output tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference response.
+        InferTensorContents contents = 5;
+      }
+
+      // The name of the model used for inference.
+      string model_name = 1;
+
+      // The version of the model used for inference.
+      string model_version = 2;
+
+      // The id of the inference request if one was specified.
+      string id = 3;
+
+      // Optional inference response parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The output tensors holding inference results.
+      repeated InferOutputTensor outputs = 5;
+
+      // The data contained in an output tensor can be represented in
+      // "raw" bytes form or in the repeated type that matches the
+      // tensor's data type. To use the raw representation 'raw_output_contents'
+      // must be initialized with data for each tensor in the same order as
+      // 'outputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferOutputTensor::contents must
+      // not be specified for any output tensor.
+      repeated bytes raw_output_contents = 6;
+    }
+
+    // An inference parameter value. The Parameters message describes a
+    // “name”/”value” pair, where the “name” is the name of the parameter
+    // and the “value” is a boolean, integer, or string corresponding to
+    // the parameter.
+    message InferParameter
+    {
+      // The parameter value can be a string, an int64, a boolean
+      // or a message specific to a predefined parameter.
+      oneof parameter_choice
+      {
+        // A boolean parameter value.
+        bool bool_param = 1;
+
+        // An int64 parameter value.
+        int64 int64_param = 2;
+
+        // A string parameter value.
+        string string_param = 3;
+      }
+    }
+
+    // The data contained in a tensor represented by the repeated type
+    // that matches the tensor's data type. Protobuf oneof is not used
+    // because oneofs cannot contain repeated fields.
+    message InferTensorContents
+    {
+      // Representation for BOOL data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bool bool_contents = 1;
+
+      // Representation for INT8, INT16, and INT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated int32 int_contents = 2;
+
+      // Representation for INT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated int64 int64_contents = 3;
+
+      // Representation for UINT8, UINT16, and UINT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated uint32 uint_contents = 4;
+
+      // Representation for UINT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated uint64 uint64_contents = 5;
+
+      // Representation for FP32 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated float fp32_contents = 6;
+
+      // Representation for FP64 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated double fp64_contents = 7;
+
+      // Representation for BYTES data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bytes bytes_contents = 8;
+    }
+  grpc_input.json: |
+    {
+      "inputs": [
+        { 
+          "name": "predict", 
+          "shape": [1, 64], 
+          "datatype": "FP32", 
+          "contents": { 
+            "fp32_contents": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
+          }
+        }
+      ]
+    }
+  http_input.json: |
+    {
+      "inputs": [
+        { 
+          "name": "predict", 
+          "shape": [1, 64], 
+          "datatype": "FP32", 
+          "data": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
+        }
+      ]
+    }
+  query.sh: |
+    cat grpc_input.json | grpcurl -plaintext -proto kserve.proto -d @ wisdom.modelmesh-serving:80 inference.GRPCInferenceService.ModelInfer
+EOF
diff --git a/testdata/controllers/blue-green/steps.sh b/testdata/controllers/blue-green/steps.sh
new file mode 100644
index 000000000..cbef1c44f
--- /dev/null
+++ b/testdata/controllers/blue-green/steps.sh
@@ -0,0 +1,41 @@
+# Start iter8 controller
+POD_NAME=blue-leader-0 POD_NAMESPACE=modelmesh-serving CONFIG_FILE=testdata/controllers/config.yaml go run main.go controllers -l trace
+# initialize primary v1
+./initialize.sh
+# query
+./sleep.sh
+# in a new terminal
+./execintosleep.sh
+# inside the sleep pod
+cd wisdom
+source query.sh
+
+# Explore what the initialization step entailed ... templated virtual service
+less initialize.sh
+
+# Explore status of virtual service ... 
+kubectl get virtualservice wisdom -o yaml
+
+# check back on status of query ... 
+
+# candidate v2
+./v2-candidate.sh
+
+source query.sh
+
+# Explore what candidate release entails... 
+less v2-candidate.sh
+
+# Explore status of virtual service ... 
+kubectl get virtualservice wisdom -o yaml
+
+# check back on status of warm up 
+
+# bump up traffic for candidate v2
+./bumpweights.sh
+
+# Explore status of virtual service ... 
+kubectl get virtualservice wisdom -o yaml
+
+# promote v2
+./promote-v2.sh
diff --git a/testdata/controllers/blue-green/v2-candidate.sh b/testdata/controllers/blue-green/v2-candidate.sh
new file mode 100755
index 000000000..1141e8543
--- /dev/null
+++ b/testdata/controllers/blue-green/v2-candidate.sh
@@ -0,0 +1,27 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-candidate
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-candidate-weight
+  labels:
+    iter8.tools/watch: "true"
+EOF
\ No newline at end of file
diff --git a/testdata/controllers/config.yaml b/testdata/controllers/config.yaml
index 279d5f275..c7a8593c0 100644
--- a/testdata/controllers/config.yaml
+++ b/testdata/controllers/config.yaml
@@ -4,8 +4,11 @@ defaultResync: 30s
 resourceTypes:
   deploy:
     Group: apps
-    Gersion: v1
+    Version: v1
     Resource: deployments
+    conditions:
+    - name: Available
+      status: "True"
   svc:
     Group: ""
     Version: v1
@@ -21,3 +24,7 @@ resourceTypes:
     Group: networking.istio.io
     Version: v1beta1
     Resource: virtualservices
+  cm:
+    Group: ""
+    Version: v1
+    Resource: configmaps

From db95ae0b647f112933994f54d6111f9cd33941df Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 5 Apr 2023 12:11:28 -0600
Subject: [PATCH 2/3] canary use case on model mesh

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 testdata/controllers/blue-green/sleep.sh      |  11 -
 testdata/controllers/canary-mm/cleanup.sh     |   9 +
 .../controllers/canary-mm/execintosleep.sh    |   6 +
 testdata/controllers/canary-mm/initialize.sh  | 140 +++++++
 testdata/controllers/canary-mm/promote-v2.sh  |  21 +
 testdata/controllers/canary-mm/sleep.sh       | 368 ++++++++++++++++++
 testdata/controllers/canary-mm/steps.sh       |  23 ++
 .../controllers/canary-mm/v2-candidate.sh     |  20 +
 8 files changed, 587 insertions(+), 11 deletions(-)
 create mode 100755 testdata/controllers/canary-mm/cleanup.sh
 create mode 100755 testdata/controllers/canary-mm/execintosleep.sh
 create mode 100755 testdata/controllers/canary-mm/initialize.sh
 create mode 100755 testdata/controllers/canary-mm/promote-v2.sh
 create mode 100755 testdata/controllers/canary-mm/sleep.sh
 create mode 100644 testdata/controllers/canary-mm/steps.sh
 create mode 100755 testdata/controllers/canary-mm/v2-candidate.sh

diff --git a/testdata/controllers/blue-green/sleep.sh b/testdata/controllers/blue-green/sleep.sh
index 678f1a406..956127ac4 100755
--- a/testdata/controllers/blue-green/sleep.sh
+++ b/testdata/controllers/blue-green/sleep.sh
@@ -361,17 +361,6 @@ data:
         }
       ]
     }
-  http_input.json: |
-    {
-      "inputs": [
-        { 
-          "name": "predict", 
-          "shape": [1, 64], 
-          "datatype": "FP32", 
-          "data": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
-        }
-      ]
-    }
   query.sh: |
     cat grpc_input.json | grpcurl -plaintext -proto kserve.proto -d @ wisdom.modelmesh-serving:80 inference.GRPCInferenceService.ModelInfer
 EOF
diff --git a/testdata/controllers/canary-mm/cleanup.sh b/testdata/controllers/canary-mm/cleanup.sh
new file mode 100755
index 000000000..00ab0a9b9
--- /dev/null
+++ b/testdata/controllers/canary-mm/cleanup.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+# Cleanup application
+kubectl -n modelmesh-serving delete isvc/wisdom-primary isvc/wisdom-candidate
+# Cleanup routemap
+kubectl delete cm/wisdom
+# Cleanup networking
+kubectl delete svc/wisdom vs/wisdom
+# Cleanup sleep utility
+kubectl delete deploy/sleep cm/wisdom-input
diff --git a/testdata/controllers/canary-mm/execintosleep.sh b/testdata/controllers/canary-mm/execintosleep.sh
new file mode 100755
index 000000000..1391b7697
--- /dev/null
+++ b/testdata/controllers/canary-mm/execintosleep.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# First, get $SLEEP_POD
+SLEEP_POD=$(kubectl get pod --sort-by={metadata.creationTimestamp} -l app=sleep -o jsonpath={.items..metadata.name} | rev | cut -d' ' -f 1 | rev)
+# Second, exec into it
+kubectl exec --stdin --tty "${SLEEP_POD}" -c sleep -- /bin/sh
+# Third, cd wisdom && source query.sh in order to query wisdom
\ No newline at end of file
diff --git a/testdata/controllers/canary-mm/initialize.sh b/testdata/controllers/canary-mm/initialize.sh
new file mode 100755
index 000000000..78641fe2d
--- /dev/null
+++ b/testdata/controllers/canary-mm/initialize.sh
@@ -0,0 +1,140 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Service
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+spec:
+  externalName: istio-ingressgateway.istio-system.svc.cluster.local
+  sessionAffinity: None
+  type: ExternalName
+---
+# use mesh gateway instead of this
+apiVersion: networking.istio.io/v1alpha3
+kind: Gateway
+metadata:
+  name: wisdom-gateway
+  namespace: modelmesh-serving
+spec:
+  selector:
+    istio: ingressgateway
+  servers:
+  - port:
+      number: 80
+      name: http
+      protocol: HTTP
+    hosts:
+    - wisdom.modelmesh-serving
+    - wisdom.modelmesh-serving.svc
+    - wisdom.modelmesh-serving.svc.cluster.local
+---
+# Create primary model
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v1
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+---
+# Set up default routing
+# The Iter8 traffic controller could do this, but don't rely on it
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+spec:
+  gateways:
+  - wisdom-gateway
+  hosts:
+  - wisdom.modelmesh-serving
+  - wisdom.modelmesh-serving.svc
+  - wisdom.modelmesh-serving.svc.cluster.local
+  http:
+  - route:
+    - destination:
+        host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+        port:
+          number: 8033
+      headers:
+        request:
+          set:
+            mm-vmodel-id: "wisdom-primary"
+
+---
+# Create a routemap for canary use case
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/managed-by: iter8
+    iter8.tools/kind: routemap
+    iter8.tools/version: v0.14
+data:
+  strSpec: |
+    variants: 
+    - resources:
+      - gvrShort: isvc
+        name: wisdom-primary
+    - resources:
+      - gvrShort: isvc
+        name: wisdom-candidate
+    # routing templates
+    routingTemplates:
+      canary-wisdom:
+        gvrShort: vs
+        template: |
+          apiVersion: networking.istio.io/v1beta1
+          kind: VirtualService
+          metadata:
+            name: wisdom
+          spec:
+            gateways:
+            - wisdom-gateway
+            hosts:
+            - wisdom.modelmesh-serving
+            - wisdom.modelmesh-serving.svc
+            - wisdom.modelmesh-serving.svc.cluster.local
+            http:
+            {{- if gt (index .Weights 1) 0 }}
+            - name: candidate
+              match:
+              - headers:
+                  traffic: 
+                    exact: test
+              route:
+              - destination:
+                  host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+                headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-candidate"
+            {{- end }}
+            - name: primary
+              route:
+              - destination:
+                  host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+                headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-primary"
+immutable: true            
+EOF
\ No newline at end of file
diff --git a/testdata/controllers/canary-mm/promote-v2.sh b/testdata/controllers/canary-mm/promote-v2.sh
new file mode 100755
index 000000000..cfa00896e
--- /dev/null
+++ b/testdata/controllers/canary-mm/promote-v2.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+cat <<EOF | kubectl replace -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+EOF
diff --git a/testdata/controllers/canary-mm/sleep.sh b/testdata/controllers/canary-mm/sleep.sh
new file mode 100755
index 000000000..b4c78c662
--- /dev/null
+++ b/testdata/controllers/canary-mm/sleep.sh
@@ -0,0 +1,368 @@
+#!/bin/sh
+cat <<EOF | istioctl kube-inject -f - | kubectl apply -f -
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sleep
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: sleep
+  template:
+    metadata:
+      labels:
+        app: sleep
+    spec:
+      containers:
+      - name: sleep
+        image: fullstorydev/grpcurl:latest-alpine
+        command: ["/bin/sh", "-c", "sleep 3650d"]
+        imagePullPolicy: IfNotPresent
+        volumeMounts:
+        - name: config-volume
+          mountPath: /wisdom
+      volumes:
+      - name: config-volume
+        configMap:
+          name: wisdom-input
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-input
+data:
+  kserve.proto: |
+    syntax = "proto3";
+    package inference;
+    option go_package = "github.com/kserve/modelmesh-serving/fvt/generated;inference";
+
+    // Inference Server GRPC endpoints.
+    service GRPCInferenceService
+    {
+      // The ServerLive API indicates if the inference server is able to receive
+      // and respond to metadata and inference requests.
+      rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
+
+      // The ServerReady API indicates if the server is ready for inferencing.
+      rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
+
+      // The ModelReady API indicates if a specific model is ready for inferencing.
+      rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
+
+      // The ServerMetadata API provides information about the server. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
+
+      // The per-model metadata API provides information about a model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
+
+      // The ModelInfer API performs inference using the specified model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+    }
+
+    message ServerLiveRequest {}
+
+    message ServerLiveResponse
+    {
+      // True if the inference server is live, false if not live.
+      bool live = 1;
+    }
+
+    message ServerReadyRequest {}
+
+    message ServerReadyResponse
+    {
+      // True if the inference server is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ModelReadyRequest
+    {
+      // The name of the model to check for readiness.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelReadyResponse
+    {
+      // True if the model is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ServerMetadataRequest {}
+
+    message ServerMetadataResponse
+    {
+      // The server name.
+      string name = 1;
+
+      // The server version.
+      string version = 2;
+
+      // The extensions supported by the server.
+      repeated string extensions = 3;
+    }
+
+    message ModelMetadataRequest
+    {
+      // The name of the model.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelMetadataResponse
+    {
+      // Metadata for a tensor.
+      message TensorMetadata
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape. A variable-size dimension is represented
+        // by a -1 value.
+        repeated int64 shape = 3;
+      }
+
+      // The model name.
+      string name = 1;
+
+      // The versions of the model available on the server.
+      repeated string versions = 2;
+
+      // The model's platform. See Platforms.
+      string platform = 3;
+
+      // The model's inputs.
+      repeated TensorMetadata inputs = 4;
+
+      // The model's outputs.
+      repeated TensorMetadata outputs = 5;
+    }
+
+    message ModelInferRequest
+    {
+      // An input tensor for an inference request.
+      message InferInputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional inference input tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference request.
+        InferTensorContents contents = 5;
+      }
+
+      // An output tensor requested for an inference request.
+      message InferRequestedOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // Optional requested output tensor parameters.
+        map<string, InferParameter> parameters = 2;
+      }
+
+      // The name of the model to use for inferencing.
+      string model_name = 1;
+
+      // The version of the model to use for inference. If not given the
+      // server will choose a version based on the model and internal policy.
+      string model_version = 2;
+
+      // Optional identifier for the request. If specified will be
+      // returned in the response.
+      string id = 3;
+
+      // Optional inference parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The input tensors for the inference.
+      repeated InferInputTensor inputs = 5;
+
+      // The requested output tensors for the inference. Optional, if not
+      // specified all outputs produced by the model will be returned.
+      repeated InferRequestedOutputTensor outputs = 6;
+
+      // The data contained in an input tensor can be represented in "raw"
+      // bytes form or in the repeated type that matches the tensor's data
+      // type. To use the raw representation 'raw_input_contents' must be
+      // initialized with data for each tensor in the same order as
+      // 'inputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferInputTensor::contents must
+      // not be specified for any input tensor.
+      repeated bytes raw_input_contents = 7;
+    }
+
+    message ModelInferResponse
+    {
+      // An output tensor returned for an inference request.
+      message InferOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional output tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference response.
+        InferTensorContents contents = 5;
+      }
+
+      // The name of the model used for inference.
+      string model_name = 1;
+
+      // The version of the model used for inference.
+      string model_version = 2;
+
+      // The id of the inference request if one was specified.
+      string id = 3;
+
+      // Optional inference response parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The output tensors holding inference results.
+      repeated InferOutputTensor outputs = 5;
+
+      // The data contained in an output tensor can be represented in
+      // "raw" bytes form or in the repeated type that matches the
+      // tensor's data type. To use the raw representation 'raw_output_contents'
+      // must be initialized with data for each tensor in the same order as
+      // 'outputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferOutputTensor::contents must
+      // not be specified for any output tensor.
+      repeated bytes raw_output_contents = 6;
+    }
+
+    // An inference parameter value. The Parameters message describes a
+    // “name”/”value” pair, where the “name” is the name of the parameter
+    // and the “value” is a boolean, integer, or string corresponding to
+    // the parameter.
+    message InferParameter
+    {
+      // The parameter value can be a string, an int64, a boolean
+      // or a message specific to a predefined parameter.
+      oneof parameter_choice
+      {
+        // A boolean parameter value.
+        bool bool_param = 1;
+
+        // An int64 parameter value.
+        int64 int64_param = 2;
+
+        // A string parameter value.
+        string string_param = 3;
+      }
+    }
+
+    // The data contained in a tensor represented by the repeated type
+    // that matches the tensor's data type. Protobuf oneof is not used
+    // because oneofs cannot contain repeated fields.
+    message InferTensorContents
+    {
+      // Representation for BOOL data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bool bool_contents = 1;
+
+      // Representation for INT8, INT16, and INT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated int32 int_contents = 2;
+
+      // Representation for INT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated int64 int64_contents = 3;
+
+      // Representation for UINT8, UINT16, and UINT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated uint32 uint_contents = 4;
+
+      // Representation for UINT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated uint64 uint64_contents = 5;
+
+      // Representation for FP32 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated float fp32_contents = 6;
+
+      // Representation for FP64 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated double fp64_contents = 7;
+
+      // Representation for BYTES data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bytes bytes_contents = 8;
+    }
+  grpc_input.json: |
+    {
+      "inputs": [
+        { 
+          "name": "predict", 
+          "shape": [1, 64], 
+          "datatype": "FP32", 
+          "contents": { 
+            "fp32_contents": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
+          }
+        }
+      ]
+    }
+  query.sh: |
+    cat grpc_input.json | grpcurl -plaintext -proto kserve.proto -d @ wisdom.modelmesh-serving:80 inference.GRPCInferenceService.ModelInfer
+  test_query.sh: |
+    cat grpc_input.json | grpcurl -plaintext -proto kserve.proto -d @ -H 'traffic: test' wisdom.modelmesh-serving:80 inference.GRPCInferenceService.ModelInfer
+EOF
diff --git a/testdata/controllers/canary-mm/steps.sh b/testdata/controllers/canary-mm/steps.sh
new file mode 100644
index 000000000..5be336e94
--- /dev/null
+++ b/testdata/controllers/canary-mm/steps.sh
@@ -0,0 +1,23 @@
+# initialize primary v1
+./initialize.sh
+# query
+./sleep.sh
+# in a new terminal
+./execintosleep.sh
+# inside the sleep pod
+cd wisdom
+source query.sh
+
+# candidate v2
+./v2-candidate.sh
+
+# inside the sleep pod
+source query.sh
+source test_query.sh
+
+# promote v2
+./promote-v2.sh
+kubectl delete isvc wisdom-candidate
+
+# clean up
+./cleanup.sh
\ No newline at end of file
diff --git a/testdata/controllers/canary-mm/v2-candidate.sh b/testdata/controllers/canary-mm/v2-candidate.sh
new file mode 100755
index 000000000..9b69704df
--- /dev/null
+++ b/testdata/controllers/canary-mm/v2-candidate.sh
@@ -0,0 +1,20 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-candidate
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+EOF
\ No newline at end of file

From efea59146e01f8ca31a64e6eb972af5dc9e0cfb1 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 5 Apr 2023 17:28:20 -0600
Subject: [PATCH 3/3] add mirror use case

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 .../bumpweights.sh                            |   0
 .../{blue-green => blue-green-mm}/cleanup.sh  |   0
 .../execintosleep.sh                          |   0
 .../initialize.sh                             |   0
 .../promote-v2.sh                             |   0
 .../{blue-green => blue-green-mm}/sleep.sh    |   0
 .../{blue-green => blue-green-mm}/steps.sh    |   0
 .../v2-candidate.sh                           |   0
 testdata/controllers/mirror-mm/cleanup.sh     |   9 +
 .../controllers/mirror-mm/execintosleep.sh    |   6 +
 testdata/controllers/mirror-mm/initialize.sh  | 133 +++++++
 testdata/controllers/mirror-mm/promote-v2.sh  |  21 +
 testdata/controllers/mirror-mm/sleep.sh       | 366 ++++++++++++++++++
 testdata/controllers/mirror-mm/steps.sh       |  23 ++
 .../controllers/mirror-mm/v2-candidate.sh     |  20 +
 testdata/controllers/mirror-mm/vs.yaml        |  32 ++
 16 files changed, 610 insertions(+)
 rename testdata/controllers/{blue-green => blue-green-mm}/bumpweights.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/cleanup.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/execintosleep.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/initialize.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/promote-v2.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/sleep.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/steps.sh (100%)
 rename testdata/controllers/{blue-green => blue-green-mm}/v2-candidate.sh (100%)
 create mode 100755 testdata/controllers/mirror-mm/cleanup.sh
 create mode 100755 testdata/controllers/mirror-mm/execintosleep.sh
 create mode 100755 testdata/controllers/mirror-mm/initialize.sh
 create mode 100755 testdata/controllers/mirror-mm/promote-v2.sh
 create mode 100755 testdata/controllers/mirror-mm/sleep.sh
 create mode 100644 testdata/controllers/mirror-mm/steps.sh
 create mode 100755 testdata/controllers/mirror-mm/v2-candidate.sh
 create mode 100755 testdata/controllers/mirror-mm/vs.yaml

diff --git a/testdata/controllers/blue-green/bumpweights.sh b/testdata/controllers/blue-green-mm/bumpweights.sh
similarity index 100%
rename from testdata/controllers/blue-green/bumpweights.sh
rename to testdata/controllers/blue-green-mm/bumpweights.sh
diff --git a/testdata/controllers/blue-green/cleanup.sh b/testdata/controllers/blue-green-mm/cleanup.sh
similarity index 100%
rename from testdata/controllers/blue-green/cleanup.sh
rename to testdata/controllers/blue-green-mm/cleanup.sh
diff --git a/testdata/controllers/blue-green/execintosleep.sh b/testdata/controllers/blue-green-mm/execintosleep.sh
similarity index 100%
rename from testdata/controllers/blue-green/execintosleep.sh
rename to testdata/controllers/blue-green-mm/execintosleep.sh
diff --git a/testdata/controllers/blue-green/initialize.sh b/testdata/controllers/blue-green-mm/initialize.sh
similarity index 100%
rename from testdata/controllers/blue-green/initialize.sh
rename to testdata/controllers/blue-green-mm/initialize.sh
diff --git a/testdata/controllers/blue-green/promote-v2.sh b/testdata/controllers/blue-green-mm/promote-v2.sh
similarity index 100%
rename from testdata/controllers/blue-green/promote-v2.sh
rename to testdata/controllers/blue-green-mm/promote-v2.sh
diff --git a/testdata/controllers/blue-green/sleep.sh b/testdata/controllers/blue-green-mm/sleep.sh
similarity index 100%
rename from testdata/controllers/blue-green/sleep.sh
rename to testdata/controllers/blue-green-mm/sleep.sh
diff --git a/testdata/controllers/blue-green/steps.sh b/testdata/controllers/blue-green-mm/steps.sh
similarity index 100%
rename from testdata/controllers/blue-green/steps.sh
rename to testdata/controllers/blue-green-mm/steps.sh
diff --git a/testdata/controllers/blue-green/v2-candidate.sh b/testdata/controllers/blue-green-mm/v2-candidate.sh
similarity index 100%
rename from testdata/controllers/blue-green/v2-candidate.sh
rename to testdata/controllers/blue-green-mm/v2-candidate.sh
diff --git a/testdata/controllers/mirror-mm/cleanup.sh b/testdata/controllers/mirror-mm/cleanup.sh
new file mode 100755
index 000000000..00ab0a9b9
--- /dev/null
+++ b/testdata/controllers/mirror-mm/cleanup.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+# Cleanup application
+kubectl -n modelmesh-serving delete isvc/wisdom-primary isvc/wisdom-candidate
+# Cleanup routemap
+kubectl delete cm/wisdom
+# Cleanup networking
+kubectl delete svc/wisdom vs/wisdom
+# Cleanup sleep utility
+kubectl delete deploy/sleep cm/wisdom-input
diff --git a/testdata/controllers/mirror-mm/execintosleep.sh b/testdata/controllers/mirror-mm/execintosleep.sh
new file mode 100755
index 000000000..1391b7697
--- /dev/null
+++ b/testdata/controllers/mirror-mm/execintosleep.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# First, get $SLEEP_POD
+SLEEP_POD=$(kubectl get pod --sort-by={metadata.creationTimestamp} -l app=sleep -o jsonpath={.items..metadata.name} | rev | cut -d' ' -f 1 | rev)
+# Second, exec into it
+kubectl exec --stdin --tty "${SLEEP_POD}" -c sleep -- /bin/sh
+# Third, cd wisdom && source query.sh in order to query wisdom
\ No newline at end of file
diff --git a/testdata/controllers/mirror-mm/initialize.sh b/testdata/controllers/mirror-mm/initialize.sh
new file mode 100755
index 000000000..935a5ca83
--- /dev/null
+++ b/testdata/controllers/mirror-mm/initialize.sh
@@ -0,0 +1,133 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Service
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+spec:
+  externalName: istio-ingressgateway.istio-system.svc.cluster.local
+  sessionAffinity: None
+  type: ExternalName
+---
+# use mesh gateway instead of this
+apiVersion: networking.istio.io/v1alpha3
+kind: Gateway
+metadata:
+  name: wisdom-gateway
+  namespace: modelmesh-serving
+spec:
+  selector:
+    istio: ingressgateway
+  servers:
+  - port:
+      number: 80
+      name: http
+      protocol: HTTP
+    hosts:
+    - wisdom.modelmesh-serving
+    - wisdom.modelmesh-serving.svc
+    - wisdom.modelmesh-serving.svc.cluster.local
+---
+# Create primary model
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v1
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+---
+# Set up default routing
+# The Iter8 traffic controller could do this, but don't rely on it
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+spec:
+  gateways:
+  - wisdom-gateway
+  hosts:
+  - wisdom.modelmesh-serving
+  - wisdom.modelmesh-serving.svc
+  - wisdom.modelmesh-serving.svc.cluster.local
+  http:
+  - route:
+    - destination:
+        host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+        port:
+          number: 8033
+      headers:
+        request:
+          set:
+            mm-vmodel-id: "wisdom-primary"
+---
+# Create a routemap for canary use case
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/managed-by: iter8
+    iter8.tools/kind: routemap
+    iter8.tools/version: v0.14
+data:
+  strSpec: |
+    variants: 
+    - resources:
+      - gvrShort: isvc
+        name: wisdom-primary
+    - weight: 100
+      resources:
+      - gvrShort: isvc
+        name: wisdom-candidate
+    # routing templates
+    routingTemplates:
+      wisdom-mirror:
+        gvrShort: vs
+        template: |
+          apiVersion: networking.istio.io/v1beta1
+          kind: VirtualService
+          metadata:
+            name: wisdom
+          spec:
+            gateways:
+            - wisdom-gateway
+            hosts:
+            - wisdom.modelmesh-serving
+            - wisdom.modelmesh-serving.svc
+            - wisdom.modelmesh-serving.svc.cluster.local
+            http:
+            - route:
+              - destination:
+                  host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+                headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-primary"
+              mirror:
+                host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+                  port:
+                    number: 8033
+              headers:
+                  request:
+                    set:
+                      mm-vmodel-id: "wisdom-primary"
+              mirrorPercentage:
+                value: {{ index .Weights 1 }}
+immutable: true            
+EOF
\ No newline at end of file
diff --git a/testdata/controllers/mirror-mm/promote-v2.sh b/testdata/controllers/mirror-mm/promote-v2.sh
new file mode 100755
index 000000000..cfa00896e
--- /dev/null
+++ b/testdata/controllers/mirror-mm/promote-v2.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+cat <<EOF | kubectl replace -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-primary
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+EOF
diff --git a/testdata/controllers/mirror-mm/sleep.sh b/testdata/controllers/mirror-mm/sleep.sh
new file mode 100755
index 000000000..956127ac4
--- /dev/null
+++ b/testdata/controllers/mirror-mm/sleep.sh
@@ -0,0 +1,366 @@
+#!/bin/sh
+cat <<EOF | istioctl kube-inject -f - | kubectl apply -f -
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sleep
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: sleep
+  template:
+    metadata:
+      labels:
+        app: sleep
+    spec:
+      containers:
+      - name: sleep
+        image: fullstorydev/grpcurl:latest-alpine
+        command: ["/bin/sh", "-c", "sleep 3650d"]
+        imagePullPolicy: IfNotPresent
+        volumeMounts:
+        - name: config-volume
+          mountPath: /wisdom
+      volumes:
+      - name: config-volume
+        configMap:
+          name: wisdom-input
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: wisdom-input
+data:
+  kserve.proto: |
+    syntax = "proto3";
+    package inference;
+    option go_package = "github.com/kserve/modelmesh-serving/fvt/generated;inference";
+
+    // Inference Server GRPC endpoints.
+    service GRPCInferenceService
+    {
+      // The ServerLive API indicates if the inference server is able to receive
+      // and respond to metadata and inference requests.
+      rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
+
+      // The ServerReady API indicates if the server is ready for inferencing.
+      rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
+
+      // The ModelReady API indicates if a specific model is ready for inferencing.
+      rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
+
+      // The ServerMetadata API provides information about the server. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
+
+      // The per-model metadata API provides information about a model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
+
+      // The ModelInfer API performs inference using the specified model. Errors are
+      // indicated by the google.rpc.Status returned for the request. The OK code
+      // indicates success and other codes indicate failure.
+      rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+    }
+
+    message ServerLiveRequest {}
+
+    message ServerLiveResponse
+    {
+      // True if the inference server is live, false if not live.
+      bool live = 1;
+    }
+
+    message ServerReadyRequest {}
+
+    message ServerReadyResponse
+    {
+      // True if the inference server is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ModelReadyRequest
+    {
+      // The name of the model to check for readiness.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelReadyResponse
+    {
+      // True if the model is ready, false if not ready.
+      bool ready = 1;
+    }
+
+    message ServerMetadataRequest {}
+
+    message ServerMetadataResponse
+    {
+      // The server name.
+      string name = 1;
+
+      // The server version.
+      string version = 2;
+
+      // The extensions supported by the server.
+      repeated string extensions = 3;
+    }
+
+    message ModelMetadataRequest
+    {
+      // The name of the model.
+      string name = 1;
+
+      // The version of the model to check for readiness. If not given the
+      // server will choose a version based on the model and internal policy.
+      string version = 2;
+    }
+
+    message ModelMetadataResponse
+    {
+      // Metadata for a tensor.
+      message TensorMetadata
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape. A variable-size dimension is represented
+        // by a -1 value.
+        repeated int64 shape = 3;
+      }
+
+      // The model name.
+      string name = 1;
+
+      // The versions of the model available on the server.
+      repeated string versions = 2;
+
+      // The model's platform. See Platforms.
+      string platform = 3;
+
+      // The model's inputs.
+      repeated TensorMetadata inputs = 4;
+
+      // The model's outputs.
+      repeated TensorMetadata outputs = 5;
+    }
+
+    message ModelInferRequest
+    {
+      // An input tensor for an inference request.
+      message InferInputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional inference input tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference request.
+        InferTensorContents contents = 5;
+      }
+
+      // An output tensor requested for an inference request.
+      message InferRequestedOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // Optional requested output tensor parameters.
+        map<string, InferParameter> parameters = 2;
+      }
+
+      // The name of the model to use for inferencing.
+      string model_name = 1;
+
+      // The version of the model to use for inference. If not given the
+      // server will choose a version based on the model and internal policy.
+      string model_version = 2;
+
+      // Optional identifier for the request. If specified will be
+      // returned in the response.
+      string id = 3;
+
+      // Optional inference parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The input tensors for the inference.
+      repeated InferInputTensor inputs = 5;
+
+      // The requested output tensors for the inference. Optional, if not
+      // specified all outputs produced by the model will be returned.
+      repeated InferRequestedOutputTensor outputs = 6;
+
+      // The data contained in an input tensor can be represented in "raw"
+      // bytes form or in the repeated type that matches the tensor's data
+      // type. To use the raw representation 'raw_input_contents' must be
+      // initialized with data for each tensor in the same order as
+      // 'inputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferInputTensor::contents must
+      // not be specified for any input tensor.
+      repeated bytes raw_input_contents = 7;
+    }
+
+    message ModelInferResponse
+    {
+      // An output tensor returned for an inference request.
+      message InferOutputTensor
+      {
+        // The tensor name.
+        string name = 1;
+
+        // The tensor data type.
+        string datatype = 2;
+
+        // The tensor shape.
+        repeated int64 shape = 3;
+
+        // Optional output tensor parameters.
+        map<string, InferParameter> parameters = 4;
+
+        // The tensor contents using a data-type format. This field must
+        // not be specified if "raw" tensor contents are being used for
+        // the inference response.
+        InferTensorContents contents = 5;
+      }
+
+      // The name of the model used for inference.
+      string model_name = 1;
+
+      // The version of the model used for inference.
+      string model_version = 2;
+
+      // The id of the inference request if one was specified.
+      string id = 3;
+
+      // Optional inference response parameters.
+      map<string, InferParameter> parameters = 4;
+
+      // The output tensors holding inference results.
+      repeated InferOutputTensor outputs = 5;
+
+      // The data contained in an output tensor can be represented in
+      // "raw" bytes form or in the repeated type that matches the
+      // tensor's data type. To use the raw representation 'raw_output_contents'
+      // must be initialized with data for each tensor in the same order as
+      // 'outputs'. For each tensor, the size of this content must match
+      // what is expected by the tensor's shape and data type. The raw
+      // data must be the flattened, one-dimensional, row-major order of
+      // the tensor elements without any stride or padding between the
+      // elements. Note that the FP16 data type must be represented as raw
+      // content as there is no specific data type for a 16-bit float
+      // type.
+      //
+      // If this field is specified then InferOutputTensor::contents must
+      // not be specified for any output tensor.
+      repeated bytes raw_output_contents = 6;
+    }
+
+    // An inference parameter value. The Parameters message describes a
+    // “name”/”value” pair, where the “name” is the name of the parameter
+    // and the “value” is a boolean, integer, or string corresponding to
+    // the parameter.
+    message InferParameter
+    {
+      // The parameter value can be a string, an int64, a boolean
+      // or a message specific to a predefined parameter.
+      oneof parameter_choice
+      {
+        // A boolean parameter value.
+        bool bool_param = 1;
+
+        // An int64 parameter value.
+        int64 int64_param = 2;
+
+        // A string parameter value.
+        string string_param = 3;
+      }
+    }
+
+    // The data contained in a tensor represented by the repeated type
+    // that matches the tensor's data type. Protobuf oneof is not used
+    // because oneofs cannot contain repeated fields.
+    message InferTensorContents
+    {
+      // Representation for BOOL data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bool bool_contents = 1;
+
+      // Representation for INT8, INT16, and INT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated int32 int_contents = 2;
+
+      // Representation for INT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated int64 int64_contents = 3;
+
+      // Representation for UINT8, UINT16, and UINT32 data types. The size
+      // must match what is expected by the tensor's shape. The contents
+      // must be the flattened, one-dimensional, row-major order of the
+      // tensor elements.
+      repeated uint32 uint_contents = 4;
+
+      // Representation for UINT64 data types. The size must match what
+      // is expected by the tensor's shape. The contents must be the
+      // flattened, one-dimensional, row-major order of the tensor elements.
+      repeated uint64 uint64_contents = 5;
+
+      // Representation for FP32 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated float fp32_contents = 6;
+
+      // Representation for FP64 data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated double fp64_contents = 7;
+
+      // Representation for BYTES data type. The size must match what is
+      // expected by the tensor's shape. The contents must be the flattened,
+      // one-dimensional, row-major order of the tensor elements.
+      repeated bytes bytes_contents = 8;
+    }
+  grpc_input.json: |
+    {
+      "inputs": [
+        { 
+          "name": "predict", 
+          "shape": [1, 64], 
+          "datatype": "FP32", 
+          "contents": { 
+            "fp32_contents": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
+          }
+        }
+      ]
+    }
+  query.sh: |
+    cat grpc_input.json | grpcurl -plaintext -proto kserve.proto -d @ wisdom.modelmesh-serving:80 inference.GRPCInferenceService.ModelInfer
+EOF
diff --git a/testdata/controllers/mirror-mm/steps.sh b/testdata/controllers/mirror-mm/steps.sh
new file mode 100644
index 000000000..a8e7057cf
--- /dev/null
+++ b/testdata/controllers/mirror-mm/steps.sh
@@ -0,0 +1,23 @@
+# initialize primary v1
+./initialize.sh
+# query
+./sleep.sh
+# in a new terminal
+./execintosleep.sh
+# inside the sleep pod
+cd wisdom
+source query.sh
+
+# candidate v2
+./v2-candidate.sh
+
+# inside the sleep pod; verify stickiness
+source query.sh
+source test_query.sh
+
+# promote v2
+./promote-v2.sh
+kubectl delete isvc wisdom-candidate
+
+# clean up
+./cleanup.sh
\ No newline at end of file
diff --git a/testdata/controllers/mirror-mm/v2-candidate.sh b/testdata/controllers/mirror-mm/v2-candidate.sh
new file mode 100755
index 000000000..9b69704df
--- /dev/null
+++ b/testdata/controllers/mirror-mm/v2-candidate.sh
@@ -0,0 +1,20 @@
+cat <<EOF | kubectl apply -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: wisdom-candidate
+  namespace: modelmesh-serving
+  labels:
+    app.kubernetes.io/name: wisdom
+    app.kubernetes.io/version: v2
+    iter8.tools/watch: "true"
+  annotations:
+    serving.kserve.io/deploymentMode: ModelMesh
+    serving.kserve.io/secretKey: localMinIO
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: s3://modelmesh-example-models/sklearn/mnist-svm.joblib
+EOF
\ No newline at end of file
diff --git a/testdata/controllers/mirror-mm/vs.yaml b/testdata/controllers/mirror-mm/vs.yaml
new file mode 100755
index 000000000..5b08849c4
--- /dev/null
+++ b/testdata/controllers/mirror-mm/vs.yaml
@@ -0,0 +1,32 @@
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: wisdom
+  namespace: modelmesh-serving
+spec:
+  gateways:
+  - wisdom-gateway
+  hosts:
+  - wisdom.modelmesh-serving
+  - wisdom.modelmesh-serving.svc
+  - wisdom.modelmesh-serving.svc.cluster.local
+  http:
+  - route:
+    - destination:
+        host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+        port:
+          number: 8033
+      headers:
+        request:
+          set:
+            mm-vmodel-id: "wisdom-primary"
+    mirror:
+      host: modelmesh-serving.modelmesh-serving.svc.cluster.local
+      port:
+        number: 8033
+    headers:
+      request:
+        set:
+          mm-vmodel-id: "wisdom-candidate"
+    mirrorPercentage:
+      value: 100