From 9c5e19aaf1ca420eaa17735314f5026a029d2160 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 23 Jan 2025 20:27:48 +0000 Subject: [PATCH 1/2] dummy Signed-off-by: Rahul Tuli --- .../README.md | 0 .../llama3_8b_2of4.py | 0 .../config.json | 46 +++++++++++++++++++ tests/e2e/vLLM/test_vllm.py | 27 +++++------ 4 files changed, 60 insertions(+), 13 deletions(-) rename examples/{sparse_2of4_quantization_fp8 => best_feature}/README.md (100%) rename examples/{sparse_2of4_quantization_fp8 => best_feature}/llama3_8b_2of4.py (100%) create mode 100644 nm-testing/llama2.c-stories42M-pruned2.4-compressed/config.json diff --git a/examples/sparse_2of4_quantization_fp8/README.md b/examples/best_feature/README.md similarity index 100% rename from examples/sparse_2of4_quantization_fp8/README.md rename to examples/best_feature/README.md diff --git a/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py b/examples/best_feature/llama3_8b_2of4.py similarity index 100% rename from examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py rename to examples/best_feature/llama3_8b_2of4.py diff --git a/nm-testing/llama2.c-stories42M-pruned2.4-compressed/config.json b/nm-testing/llama2.c-stories42M-pruned2.4-compressed/config.json new file mode 100644 index 0000000000..316d12f2cd --- /dev/null +++ b/nm-testing/llama2.c-stories42M-pruned2.4-compressed/config.json @@ -0,0 +1,46 @@ +{ + "_name_or_path": "nm-testing/llama2.c-stories42M-pruned2.4", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 512, + "initializer_range": 0.02, + "intermediate_size": 1376, + "max_position_embeddings": 1024, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 8, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "quantization_config": { + "quant_method": "compressed-tensors", + "sparsity_config": { + "format": "sparse-24-bitmask", + "global_sparsity": 0.21780030857394755, + "ignore": [ + "lm_head" + ], + "registry_requires_subclass": false, + "sparsity_structure": "2:4", + "targets": [ + "Linear" + ] + }, + "version": "0.8.1" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.48.1", + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 70a6a35e42..1539d71dd0 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -134,19 +134,19 @@ def test_vllm(self): logger.info("================= UPLOADING TO HUB ======================") - stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e" + # stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e" - self.api.create_repo( - repo_id=stub, - exist_ok=True, - repo_type="model", - private=False, - ) + # self.api.create_repo( + # repo_id=stub, + # exist_ok=True, + # repo_type="model", + # private=False, + # ) - self.api.upload_folder( - repo_id=stub, - folder_path=self.save_dir, - ) + # self.api.upload_folder( + # repo_id=stub, + # folder_path=self.save_dir, + # ) logger.info("================= RUNNING vLLM =========================") @@ -172,8 +172,9 @@ def test_vllm(self): self.tear_down() def tear_down(self): - if self.save_dir is not None: - shutil.rmtree(self.save_dir) + # if self.save_dir is not None: + # shutil.rmtree(self.save_dir) + pass def _check_session_contains_recipe(self) -> None: session = active_session() From 6c5f1d229498ae60400c8a175d9fc65cee5673d2 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 23 Jan 2025 20:37:55 +0000 Subject: [PATCH 2/2] bleh Signed-off-by: Rahul Tuli --- tests/e2e/vLLM/temp_config/sparse2of4_fp8_dynamic.yaml | 7 +++++++ tests/e2e/vLLM/temp_config/sparse_24.yaml | 8 ++++++++ tests/e2e/vLLM/test_vllm.py | 3 ++- 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/vLLM/temp_config/sparse2of4_fp8_dynamic.yaml create mode 100644 tests/e2e/vLLM/temp_config/sparse_24.yaml diff --git a/tests/e2e/vLLM/temp_config/sparse2of4_fp8_dynamic.yaml b/tests/e2e/vLLM/temp_config/sparse2of4_fp8_dynamic.yaml new file mode 100644 index 0000000000..e1785ce2c0 --- /dev/null +++ b/tests/e2e/vLLM/temp_config/sparse2of4_fp8_dynamic.yaml @@ -0,0 +1,7 @@ +cadence: "nightly" +test_type: "regression" +model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 +recipe: tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml +scheme: sparse2of4_fp8_dynamic +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft \ No newline at end of file diff --git a/tests/e2e/vLLM/temp_config/sparse_24.yaml b/tests/e2e/vLLM/temp_config/sparse_24.yaml new file mode 100644 index 0000000000..653168b977 --- /dev/null +++ b/tests/e2e/vLLM/temp_config/sparse_24.yaml @@ -0,0 +1,8 @@ +cadence: "nightly" +test_type: "regression" +model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 +recipe: tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml +scheme: sparse2of4_only +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft +save_compressed: False \ No newline at end of file diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 1539d71dd0..5835b901ee 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -1,6 +1,7 @@ import os import re -import shutil + +# import shutil from pathlib import Path from typing import Callable