diff --git a/.github/workflows/nv-a6000-fastgen.yml b/.github/workflows/nv-a6000-fastgen.yml index 0b9da000..cc6f9942 100644 --- a/.github/workflows/nv-a6000-fastgen.yml +++ b/.github/workflows/nv-a6000-fastgen.yml @@ -9,6 +9,7 @@ on: - 'mii/legacy/**' - 'tests/legacy/**' - '.github/workflows/nv-v100-legacy.yml' + - '.github/workflows/nv-a6000-sd.yml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/nv-a6000-sd.yml b/.github/workflows/nv-a6000-sd.yml new file mode 100644 index 00000000..8d41f5bd --- /dev/null +++ b/.github/workflows/nv-a6000-sd.yml @@ -0,0 +1,58 @@ +name: nv-a6000-sd + +on: + workflow_dispatch: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - 'mii/legacy/**' + - 'tests/legacy/**' + - '.github/workflows/nv-a6000-sd.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-tests: + runs-on: [self-hosted, nvidia, a6000] + container: + image: nvcr.io/nvidia/pytorch:24.03-py3 + ports: + - 80 + options: --gpus all --shm-size "8G" + + steps: + - uses: actions/checkout@v4 + + - name: Check container state + run: | + ldd --version + nvcc --version + nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + - name: Install transformers + run: | + git clone --depth=1 https://github.com/huggingface/transformers + cd transformers + git rev-parse --short HEAD + python -m pip install . + - name: Install deepspeed + run: | + git clone --depth=1 https://github.com/microsoft/DeepSpeed + cd DeepSpeed + python -m pip install . + ds_report + - name: Install MII + run: | + pip install .[dev] + - name: Python environment + run: | + python -m pip list + - name: Unit tests + run: | + unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch + cd tests/legacy + python -m pytest --color=yes --durations=0 --verbose -rF -m "stable_diffusion" ./ diff --git a/mii/legacy/method_table.py b/mii/legacy/method_table.py index 520e9a1c..85d21b79 100644 --- a/mii/legacy/method_table.py +++ b/mii/legacy/method_table.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # DeepSpeed Team +import io + from abc import ABC, abstractmethod from mii.legacy.constants import TaskType from mii.legacy.grpc_related.proto import legacymodelresponse_pb2 as modelresponse_pb2 @@ -274,11 +276,28 @@ def pack_request_to_proto(self, request_dict, **query_kwargs): negative_prompt = request_dict.get("negative_prompt", [""] * len(prompt)) negative_prompt = negative_prompt if isinstance(negative_prompt, list) else [negative_prompt] - image = request_dict["image"] if isinstance(request_dict["image"], - list) else [request_dict["image"]] - mask_image = request_dict["mask_image"] if isinstance( + image_list = request_dict["image"] if isinstance( + request_dict["image"], + list) else [request_dict["image"]] + mask_image_list = request_dict["mask_image"] if isinstance( request_dict["mask_image"], list) else [request_dict["mask_image"]] + image = [] + for img in image_list: + if isinstance(img, bytes): + image.append(img) + else: + imgByteArr = io.BytesIO() + img.save(imgByteArr, format=img.format) + image.append(imgByteArr.getvalue()) + mask_image = [] + for img in mask_image_list: + if isinstance(img, bytes): + mask_image.append(img) + else: + imgByteArr = io.BytesIO() + img.save(imgByteArr, format=img.format) + mask_image.append(imgByteArr.getvalue()) return modelresponse_pb2.InpaintingRequest( prompt=prompt, diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py index 15973d0e..fca49470 100644 --- a/mii/legacy/models/providers/diffusers.py +++ b/mii/legacy/models/providers/diffusers.py @@ -4,11 +4,18 @@ # DeepSpeed Team import os import torch +from huggingface_hub import HfApi from .utils import attempt_load from mii.config import ModelConfig +def _get_model_revs(model_name): + api = HfApi() + branches = api.list_repo_refs(model_name).branches + return [b.name for b in branches] + + def diffusers_provider(model_config: ModelConfig): from diffusers import DiffusionPipeline @@ -17,7 +24,8 @@ def diffusers_provider(model_config: ModelConfig): kwargs = model_config.pipeline_kwargs if model_config.dtype == torch.half: kwargs["torch_dtype"] = torch.float16 - kwargs["revision"] = "fp16" + if "fp16" in _get_model_revs(model_config.model): + kwargs["revision"] = "fp16" pipeline = attempt_load(DiffusionPipeline.from_pretrained, model_config.model, diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 88d0d08e..1c72270b 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,4 +1,5 @@ clang-format==18.1.3 +diffusers einops pre-commit>=2.20.0 pytest diff --git a/tests/legacy/conftest.py b/tests/legacy/conftest.py index 9cb85cfd..239d8383 100644 --- a/tests/legacy/conftest.py +++ b/tests/legacy/conftest.py @@ -7,6 +7,8 @@ import os import mii.legacy as mii from types import SimpleNamespace +from packaging import version as pkg_version +import torch @pytest.fixture(scope="function", params=["fp16"]) @@ -84,13 +86,20 @@ def ds_config(request): return request.param -@pytest.fixture(scope="function") -def replace_with_kernel_inject(model_name): - if "clip-vit" in model_name: +@pytest.fixture(scope="function", params=[None]) +def replace_with_kernel_inject(request, model_name): + if request.param is not None: + return request.param + if model_name == "openai/clip-vit-base-patch32": return False return True +@pytest.fixture(scope="function", params=[False]) +def enable_cuda_graph(request): + return request.param + + @pytest.fixture(scope="function") def model_config( task_name: str, @@ -104,6 +113,7 @@ def model_config( enable_zero: bool, ds_config: dict, replace_with_kernel_inject: bool, + enable_cuda_graph: bool, ): config = SimpleNamespace( skip_model_check=True, # TODO: remove this once conversation task check is fixed @@ -120,6 +130,7 @@ def model_config( enable_zero=enable_zero, ds_config=ds_config, replace_with_kernel_inject=replace_with_kernel_inject, + enable_cuda_graph=enable_cuda_graph, ) return config.__dict__ @@ -145,8 +156,31 @@ def expected_failure(request): return request.param +@pytest.fixture(scope="function", params=[None]) +def min_compute_capability(request): + return request.param + + +@pytest.fixture(scope="function") +def meets_compute_capability_reqs(min_compute_capability): + if min_compute_capability is None: + return + min_compute_ver = pkg_version.parse(str(min_compute_capability)) + device_compute_ver = pkg_version.parse(".".join( + map(str, + torch.cuda.get_device_capability()))) + if device_compute_ver < min_compute_ver: + pytest.skip( + f"Skipping test because device compute capability ({device_compute_ver}) is less than the minimum required ({min_compute_ver})." + ) + + @pytest.fixture(scope="function") -def deployment(deployment_name, mii_config, model_config, expected_failure): +def deployment(deployment_name, + mii_config, + model_config, + expected_failure, + meets_compute_capability_reqs): if expected_failure is not None: with pytest.raises(expected_failure) as excinfo: mii.deploy( diff --git a/tests/legacy/pytest.ini b/tests/legacy/pytest.ini index 4c072427..2ba77e71 100644 --- a/tests/legacy/pytest.ini +++ b/tests/legacy/pytest.ini @@ -1,3 +1,3 @@ [pytest] markers = - deepspeed:Run test for deepspeed CI + stable_diffusion:Run Stable Diffusion tests diff --git a/tests/legacy/test_local_deployment.py b/tests/legacy/test_local_deployment.py index 531036f6..48b05e9c 100644 --- a/tests/legacy/test_local_deployment.py +++ b/tests/legacy/test_local_deployment.py @@ -5,9 +5,86 @@ import pytest import mii.legacy as mii +import requests +from PIL import Image + @pytest.mark.parametrize( "task_name, model_name, query", + [( + "conversational", + "microsoft/DialoGPT-small", + { + "text": "DeepSpeed is the greatest", + "conversation_id": 3, + "past_user_inputs": [], + "generated_responses": [], + }, + ), + ( + "fill-mask", + "bert-base-uncased", + { + "query": "Hello I'm a [MASK] model." + }, + ), + ( + "question-answering", + "deepset/roberta-large-squad2", + { + "question": "What is the greatest?", + "context": "DeepSpeed is the greatest", + }, + ), + ( + "text-generation", + "bigscience/bloom-560m", + { + "query": ["DeepSpeed is the greatest", + "Seattle is"] + }, + ), + ( + "token-classification", + "Jean-Baptiste/roberta-large-ner-english", + { + "query": "My name is jean-baptiste and I live in montreal." + }, + ), + ( + "text-classification", + "roberta-large-mnli", + { + "query": "DeepSpeed is the greatest" + }, + ), + ( + "zero-shot-image-classification", + "openai/clip-vit-base-patch32", + { + "image": + "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png", + "candidate_labels": ["animals", + "humans", + "landscape"] + }, + ), + ("text-to-image-inpainting", + "stabilityai/stable-diffusion-2-inpainting", + { + "prompt": + "a black cat with glowing eyes", + "image": + Image.open( + requests.get( + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png", + stream=True).raw), + "mask_image": + Image.open( + requests.get( + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png", + stream=True).raw), + })], [ ( "fill-mask", @@ -73,7 +150,7 @@ def test_single_GPU(deployment, query): @pytest.mark.parametrize( - "task_name, model_name, query", + "task_name, model_name, query, tensor_parallel", [ ( "text-generation", @@ -82,6 +159,7 @@ def test_single_GPU(deployment, query): "query": ["DeepSpeed is the greatest", "Seattle is"] }, + 2, ), ], ) @@ -111,3 +189,25 @@ def test_session(deployment, query): result = generator.query(query) generator.destroy_session(session_name) assert result + + +@pytest.mark.stable_diffusion +@pytest.mark.parametrize( + "task_name, model_name, query", + [ + ( + "text-to-image", + "openskyml/midjourney-mini", + { + "prompt": "a dog on a rocket", + "negative_prompt": "planet earth", + }, + ), + ], +) +@pytest.mark.parametrize("enable_cuda_graph", [True]) +@pytest.mark.parametrize("min_compute_capability", [8]) +def test_SD_kernel_inject(deployment, query): + generator = mii.mii_query_handle(deployment) + result = generator.query(query) + assert result diff --git a/tests/legacy/test_non_persistent_deployment.py b/tests/legacy/test_non_persistent_deployment.py index ed2b13fb..8ef8d6ec 100644 --- a/tests/legacy/test_non_persistent_deployment.py +++ b/tests/legacy/test_non_persistent_deployment.py @@ -26,13 +26,6 @@ "context": "DeepSpeed is the greatest", }, ), - ( - "text-generation", - "distilgpt2", - { - "query": ["DeepSpeed is the greatest"] - }, - ), ( "text-generation", "bigscience/bloom-560m",