deepspeedai · TosinSeg · Jun 19, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
@@ -178,24 +178,6 @@ mii.deploy(...
     mii_config=mii_configs)
 ```
 
-**Non-persistent Deployment**
-
-You can enable a non-persistent deployment which allows you to make queries without standing up a server. The non-persistent deployment acts as a simplified interface to DeepSpeed-inference for use cases that do not require creating a persistent model server process. Changing the `deployment_type` to `NON_PERSISTENT` in `mii.deploy(...)` will activate this option.
-
-```python
-...
-mii.deploy(deployment_name = DEPLOYMENT_NAME,
-	   deployment_type=mii.constants.DeploymentType.NON_PERSISTENT
-	   ...
-	   )
-
-generator = mii.mii_query_handle(DEPLOYMENT_NAME)
-result = generator.query({"query": ["DeepSpeed is", "Seattle is"]}, do_sample=True, max_new_tokens=30})
-
-```
-
-You can find a complete example [here]("https://github.com/microsoft/DeepSpeed-MII/tree/main/examples/non_persistent")
-
 Any HTTP client can be used to call the APIs. An example of using curl is:
 ```bash
 # Assume deployment_name and restful_api_port are set to bloom560m_deployment and 28080 respectively:
@@ -219,6 +201,24 @@ response = requests.post(url, data=json_params, headers={
 print(response.json())
 ```
 
+**Non-persistent Deployment**
+
+You can enable a non-persistent deployment which allows you to make queries without standing up a server. The non-persistent deployment acts as a simplified interface to DeepSpeed-inference for use cases that do not require creating a persistent model server process. Changing the `deployment_type` to `NON_PERSISTENT` in `mii.deploy(...)` will activate this option.
+
+```python
+...
+mii.deploy(deployment_name = DEPLOYMENT_NAME,
+           deployment_type=mii.constants.DeploymentType.NON_PERSISTENT
+           ...
+           )
+
+generator = mii.mii_query_handle(DEPLOYMENT_NAME)
+result = generator.query({"query": ["DeepSpeed is", "Seattle is"]}, do_sample=True, max_new_tokens=30})
+
+```
+
+You can find a complete example [here]("https://github.com/microsoft/DeepSpeed-MII/tree/main/examples/non_persistent")
+
 ## Deploying with MII-Azure
 
 MII supports deployment on Azure via AML Inference. To enable this, MII generates AML deployment assets for a given model that can be deployed using the Azure-CLI, as shown in the code below. Furthermore, deploying on Azure, allows MII to leverage DeepSpeed-Azure as its optimization backend, which offers better latency and cost reduction than DeepSpeed-Public.

@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+import mii
+
+deployments = []
+results = []
+name = 'bigscience/bloom-560m'
+mii_configs1 = {"tensor_parallel": 1, "dtype": "fp16"}
+deployments.append(
+    mii.DeploymentConfig(task='text-generation',
+                         model=name,
+                         deployment_name=name + "_deployment5",
+                         mii_configs=mii.config.MIIConfig(**mii_configs1)
+                         ))
+
+generator = mii.mii_query_handle("multi_models")
+generator.add_models(deployments=deployments)
+
+result = generator.query(
+    {
+        "query": ["DeepSpeed is",
+                  "Seattle is"],
+        "deployment_name": "bigscience/bloom-560m_deployment5"
+    },
+    do_sample=True,
+    max_new_tokens=30,
+)
+print(result)
+generator.delete_model("bigscience/bloom-560m_deployment5")
@@ -0,0 +1,49 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+import mii
+
+gpu_index_map1 = {'master': [0]}
+gpu_index_map2 = {'master': [1]}
+gpu_index_map3 = {'master': [0, 1]}
+
+deployments = []
+
+mii_configs1 = {"tensor_parallel": 2, "dtype": "fp16"}
+mii_configs2 = {"tensor_parallel": 1}
+
+name = "bigscience/bloom-560m"
+deployments.append(
+    mii.DeploymentConfig(task='text-generation',
+                         model=name,
+                         deployment_name=name + "_deployment",
+                         GPU_index_map=gpu_index_map3,
+                         tensor_parallel=2,
+                         dtype="fp16"))
+
+# gpt2
+name = "microsoft/DialogRPT-human-vs-rand"
+deployments.append(
+    mii.DeploymentConfig(task='text-classification',
+                         model=name,
+                         deployment_name=name + "_deployment",
+                         GPU_index_map=gpu_index_map2))
+
+name = "microsoft/DialoGPT-large"
+deployments.append(
+    mii.DeploymentConfig(
+        task='conversational',
+        model=name,
+        deployment_name=name + "_deployment",
+        GPU_index_map=gpu_index_map1,
+    ))
+
+name = "deepset/roberta-large-squad2"
+deployments.append(
+    mii.DeploymentConfig(task="question-answering",
+                         model=name,
+                         deployment_name=name + "-qa-deployment",
+                         GPU_index_map=gpu_index_map2))
+
+mii.deploy(deployment_tag="multi_models", deployments=deployments)
@@ -0,0 +1,50 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+import mii
+
+results = []
+generator = mii.mii_query_handle("multi_models")
+result = generator.query(
+    {
+        "query": ["DeepSpeed is",
+                  "Seattle is"],
+        "deployment_name": "bigscience/bloom-560m_deployment"
+    },
+    do_sample=True,
+    max_new_tokens=30,
+)
+results.append(result)
+print(result)
+
+result = generator.query({
+    'query':
+    "DeepSpeed is the greatest",
+    "deployment_name":
+    "microsoft/DialogRPT-human-vs-rand_deployment"
+})
+results.append(result)
+print(result)
+
+result = generator.query({
+    'text': "DeepSpeed is the greatest",
+    'conversation_id': 3,
+    'past_user_inputs': [],
+    'generated_responses': [],
+    "deployment_name": "microsoft/DialoGPT-large_deployment"
+})
+results.append(result)
+print(result)
+
+result = generator.query({
+    'question':
+    "What is the greatest?",
+    'context':
+    "DeepSpeed is the greatest",
+    "deployment_name":
+    "deepset/roberta-large-squad2" + "-qa-deployment"
+})
+results.append(result)
+print(result)
@@ -0,0 +1,7 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+import mii
+
+mii.terminate("multi_models")
@@ -10,7 +10,7 @@
 from .constants import DeploymentType, Tasks
 from .aml_related.utils import aml_output_path
 
-from .config import MIIConfig, LoadBalancerConfig
+from .config import MIIConfig, LoadBalancerConfig, DeploymentConfig
 from .grpc_related.proto import modelresponse_pb2_grpc
 
 __version__ = "0.0.0"