SeldonIO · MiguelAAe · Dec 18, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
@@ -19,7 +19,7 @@ Feature: Explicit Model deployment
       storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
     """
     When the model "alpha-1" should eventually become Ready with timeout "20s"
-    Then send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
+    Then I send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
     """
     {
         "inputs": [
@@ -52,7 +52,7 @@ Feature: Explicit Model deployment
     }
   ] }
     """
-    Then send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
+    Then I send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
     """
     {
         "inputs": [
@@ -85,7 +85,7 @@ Feature: Explicit Model deployment
   ] }
     """
     Then delete the model "alpha-1" with timeout "10s"
-    Then send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
+    Then I send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
     """
     {
         "inputs": [
@@ -99,7 +99,7 @@ Feature: Explicit Model deployment
     }
     """
     And expect http response status code "404"
-    Then send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
+    Then I send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
     """
     {
         "inputs": [

@@ -51,7 +51,7 @@ Feature: Explicit Model deployment
       storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
     """
     When the model "overcommit-3" should eventually become Ready with timeout "20s"
-    Then send HTTP inference request with timeout "20s" to model "overcommit-1" with payload:
+    Then I send HTTP inference request with timeout "20s" to model "overcommit-1" with payload:
     """
     {
         "inputs": [
@@ -65,7 +65,7 @@ Feature: Explicit Model deployment
     }
     """
     And expect http response status code "200"
-    Then send HTTP inference request with timeout "20s" to model "overcommit-2" with payload:
+    Then I send HTTP inference request with timeout "20s" to model "overcommit-2" with payload:
     """
     {
         "inputs": [
@@ -79,7 +79,7 @@ Feature: Explicit Model deployment
     }
     """
     And expect http response status code "200"
-    Then send HTTP inference request with timeout "20s" to model "overcommit-3" with payload:
+    Then I send HTTP inference request with timeout "20s" to model "overcommit-3" with payload:
     """
     {
         "inputs": [

@@ -1,8 +1,10 @@
-@PipelineDeployment @Functional @Pipelines @Conditional
+@PipelineConditional @Functional @Pipelines @Conditional
 Feature: Conditional pipeline with branching models
-  This pipeline uses a conditional model to route data to either add10 or mul10.
+  In order to support decision-based inference
+  As a model user
+  I need a conditional pipeline that directs inputs to one of multiple models based on a condition
 
-  Scenario: Deploy tfsimple-conditional pipeline and wait for readiness
+  Scenario: Deploy a conditional pipeline, run inference, and verify the output
     Given I deploy model spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
@@ -43,7 +45,7 @@ Feature: Conditional pipeline with branching models
     And the model "add10-nbsl" should eventually become Ready with timeout "20s"
     And the model "mul10-nbsl" should eventually become Ready with timeout "20s"
 
-    And I deploy pipeline spec with timeout "30s":
+    When I deploy a pipeline spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
     kind: Pipeline
@@ -69,3 +71,11 @@ Feature: Conditional pipeline with branching models
         stepsJoin: any
     """
     Then the pipeline "tfsimple-conditional-nbsl" should eventually become Ready with timeout "40s"
+    Then I send gRPC inference request with timeout "20s" to pipeline "tfsimple-conditional-nbsl" with payload:
+    """
+    {"model_name":"conditional-nbsl","inputs":[{"name":"CHOICE","contents":{"int_contents":[0]},"datatype":"INT32","shape":[1]},{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}
+    """
+    And expect gRPC response body to contain JSON:
+    """
+    {"outputs":[{"name":"OUTPUT","datatype":"FP32","shape":[4]}],"raw_output_contents":["AAAgQQAAoEEAAPBBAAAgQg=="]}
+    """
@@ -1,14 +1,16 @@
-@PipelineDeployment @Functional @Pipelines @ModelChainingFromInputs
+@PipelineModelChainingFromInputs @Functional @Pipelines @ModelChainingFromInputs
 Feature: Pipeline model chaining using inputs and outputs
-  This pipeline chains tfsimple1 into tfsimple2 using both inputs and outputs.
+  In order to build multi-stage inference workflows
+  As a model user
+  I need a pipeline that chains models together by passing outputs from one stage into the next
 
-  Scenario: Deploy tfsimples-input pipeline and wait for readiness
+  Scenario: Scenario: Deploy a model-chaining pipeline, run inference, and verify the output
     Given I deploy model spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
     kind: Model
     metadata:
-      name: chain-from-input-tfsimple1-yhjo
+      name: tfsimple1-yhjo
     spec:
       storageUri: "gs://seldon-models/triton/simple"
       requirements:
@@ -21,33 +23,128 @@ Feature: Pipeline model chaining using inputs and outputs
     apiVersion: mlops.seldon.io/v1alpha1
     kind: Model
     metadata:
-      name: chain-from-input-tfsimple2-yhjo
+      name: tfsimple2-yhjo
     spec:
       storageUri: "gs://seldon-models/triton/simple"
       requirements:
       - tensorflow
       memory: 100Ki
     """
-    Then the model "chain-from-input-tfsimple1-yhjo" should eventually become Ready with timeout "20s"
-    Then the model "chain-from-input-tfsimple2-yhjo" should eventually become Ready with timeout "20s"
-
-    And I deploy pipeline spec with timeout "30s":
+    Then the model "tfsimple1-yhjo" should eventually become Ready with timeout "20s"
+    Then the model "tfsimple2-yhjo" should eventually become Ready with timeout "20s"
+    And I deploy a pipeline spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
     kind: Pipeline
     metadata:
-      name: chain-from-input-tfsimples-input-yhjo
+      name: chain-from-input-yhjo
     spec:
       steps:
-        - name: chain-from-input-tfsimple1-yhjo
-        - name: chain-from-input-tfsimple2-yhjo
+        - name: tfsimple1-yhjo
+        - name: tfsimple2-yhjo
           inputs:
-          - chain-from-input-tfsimple1-yhjo.inputs.INPUT0
-          - chain-from-input-tfsimple1-yhjo.outputs.OUTPUT1
+          - tfsimple1-yhjo.inputs.INPUT0
+          - tfsimple1-yhjo.outputs.OUTPUT1
           tensorMap:
-            chain-from-input-tfsimple1-yhjo.outputs.OUTPUT1: INPUT1
+            tfsimple1-yhjo.outputs.OUTPUT1: INPUT1
       output:
         steps:
-        - chain-from-input-tfsimple2-yhjo
+        - tfsimple2-yhjo
+    """
+    Then the pipeline "chain-from-input-yhjo" should eventually become Ready with timeout "40s"
+    When I send HTTP inference request with timeout "20s" to pipeline "chain-from-input-yhjo" with payload:
+    """
+    {"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}
+    """
+    And expect http response status code "200"
+    And expect http response body to contain JSON:
+    """
+    {
+      "model_name": "",
+      "outputs": [
+        {
+          "data": [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16
+          ],
+          "name": "OUTPUT0",
+          "shape": [
+            1,
+            16
+          ],
+          "datatype": "INT32"
+        },
+        {
+          "data": [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16
+          ],
+          "name": "OUTPUT1",
+          "shape": [
+            1,
+            16
+          ],
+          "datatype": "INT32"
+        }
+      ]
+    }
+    """
+    Then I send gRPC inference request with timeout "20s" to pipeline "chain-from-input-yhjo" with payload:
+    """
+    {"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}
+    """
+    And expect gRPC response body to contain JSON:
+    """
+    {
+      "outputs": [
+        {
+          "name": "OUTPUT0",
+          "datatype": "INT32",
+          "shape": [
+            1,
+            16
+          ]
+        },
+        {
+          "name": "OUTPUT1",
+          "datatype": "INT32",
+          "shape": [
+            1,
+            16
+          ]
+        }
+      ],
+      "raw_output_contents": [
+        "AQAAAAIAAAADAAAABAAAAAUAAAAGAAAABwAAAAgAAAAJAAAACgAAAAsAAAAMAAAADQAAAA4AAAAPAAAAEAAAAA==",
+        "AQAAAAIAAAADAAAABAAAAAUAAAAGAAAABwAAAAgAAAAJAAAACgAAAAsAAAAMAAAADQAAAA4AAAAPAAAAEAAAAA=="
+      ]
+    }
     """
-    Then the pipeline "chain-from-input-tfsimples-input-yhjo" should eventually become Ready with timeout "40s"
@@ -1,8 +1,10 @@
-@PipelineDeployment @Functional @Pipelines @PipelineInputTensors
+@PipelineInputTensors @Functional @Pipelines @PipelineInputTensors
 Feature: Pipeline using direct input tensors
-  This pipeline directly routes pipeline input tensors INPUT0 and INPUT1 into separate models.
+  In order to build pipelines that dispatch inputs to multiple models
+  As a model user
+  I need a pipeline that routes individual input tensors directly to different model stages
 
-  Scenario: Deploy pipeline-inputs pipeline and wait for readiness
+  Scenario: Deploy a pipeline that routes individual input tensors directly to different model stages, run inference, and verify the output
     Given I deploy model spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
@@ -30,7 +32,7 @@ Feature: Pipeline using direct input tensors
     Then the model "mul10-tw2x" should eventually become Ready with timeout "20s"
     And the model "add10-tw2x" should eventually become Ready with timeout "20s"
 
-    And I deploy pipeline spec with timeout "30s":
+    And I deploy a pipeline spec with timeout "30s":
     """
     apiVersion: mlops.seldon.io/v1alpha1
     kind: Pipeline
@@ -54,3 +56,67 @@ Feature: Pipeline using direct input tensors
         - add10-tw2x
     """
     Then the pipeline "pipeline-inputs-tw2x" should eventually become Ready with timeout "20s"
+    When I send gRPC inference request with timeout "20s" to pipeline "pipeline-inputs-tw2x" with payload:
+    """
+    {
+      "model_name": "pipeline",
+      "inputs": [
+        {
+          "name": "INPUT0",
+          "contents": {
+            "fp32_contents": [
+              1,
+              2,
+              3,
+              4
+            ]
+          },
+          "datatype": "FP32",
+          "shape": [
+            4
+          ]
+        },
+        {
+          "name": "INPUT1",
+          "contents": {
+            "fp32_contents": [
+              1,
+              2,
+              3,
+              4
+            ]
+          },
+          "datatype": "FP32",
+          "shape": [
+            4
+          ]
+        }
+      ]
+    }
+
+    """
+    And expect gRPC response body to contain JSON:
+    """
+    {
+      "outputs": [
+        {
+          "name": "OUTPUT",
+          "datatype": "FP32",
+          "shape": [
+            4
+          ]
+        },
+        {
+          "name": "OUTPUT",
+          "datatype": "FP32",
+          "shape": [
+            4
+          ]
+        }
+      ],
+      "raw_output_contents": [
+        "AAAgQQAAoEEAAPBBAAAgQg==",
+        "AAAwQQAAQEEAAFBBAABgQQ=="
+      ]
+    }
+    """