|
32 | 32 |
|
33 | 33 | logger = logging.getLogger(__name__) |
34 | 34 |
|
| 35 | + |
| 36 | +class OVQuantizationMethod(str, Enum): |
| 37 | + DEFAULT = "default" |
| 38 | + HYBRID = "hybrid" |
| 39 | + AWQ = "awq" |
| 40 | + |
| 41 | + |
35 | 42 | _DEFAULT_4BIT_CONFIGS = { |
36 | | - "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, |
| 43 | + "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "scale_estimation": True}, |
37 | 44 | "EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64}, |
38 | 45 | "facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8}, |
39 | | - "bigscience/bloomz-7b1": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.6}, |
40 | 46 | "togethercomputer/RedPajama-INCITE-7B-Instruct": {"bits": 4, "sym": False, "group_size": 128}, |
41 | 47 | "HuggingFaceH4/zephyr-7b-beta": { |
42 | 48 | "bits": 4, |
43 | 49 | "sym": True, |
44 | 50 | "group_size": 128, |
45 | 51 | "ratio": 0.8, |
46 | 52 | "dataset": "wikitext2", |
47 | | - "awq": True, |
| 53 | + "quant_method": OVQuantizationMethod.AWQ, |
48 | 54 | }, |
49 | 55 | "meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, |
50 | 56 | "meta-llama/Llama-2-7b-chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, |
|
55 | 61 | "group_size": 64, |
56 | 62 | "ratio": 0.8, |
57 | 63 | "dataset": "wikitext2", |
58 | | - "awq": True, |
| 64 | + "quant_method": OVQuantizationMethod.AWQ, |
59 | 65 | }, |
60 | 66 | "stabilityai/stablelm-zephyr-3b": { |
61 | 67 | "bits": 4, |
62 | 68 | "sym": False, |
63 | 69 | "group_size": 128, |
64 | 70 | "ratio": 1.0, |
65 | 71 | "dataset": "wikitext2", |
66 | | - "awq": True, |
| 72 | + "quant_method": OVQuantizationMethod.AWQ, |
67 | 73 | }, |
68 | 74 | "stabilityai/stable-code-3b": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8}, |
69 | 75 | "pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, |
70 | 76 | "THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72}, |
71 | 77 | "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, |
72 | | - "openlm-research/open_llama_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, |
| 78 | + "openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True}, |
73 | 79 | "openlm-research/open_llama_3b_v2": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, |
74 | 80 | "tiiuae/falcon-7b-instruct": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, |
75 | 81 | "psmathur/orca_mini_3b": { |
|
78 | 84 | "group_size": 64, |
79 | 85 | "all_layers": True, |
80 | 86 | "dataset": "wikitext2", |
81 | | - "awq": True, |
| 87 | + "quant_method": OVQuantizationMethod.AWQ, |
82 | 88 | }, |
83 | 89 | "bigscience/bloomz-560m": { |
84 | 90 | "bits": 4, |
85 | 91 | "sym": True, |
86 | 92 | "group_size": 64, |
87 | 93 | "ratio": 0.8, |
88 | 94 | "dataset": "wikitext2", |
89 | | - "awq": True, |
| 95 | + "quant_method": OVQuantizationMethod.AWQ, |
90 | 96 | }, |
91 | 97 | "mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, |
92 | 98 | "facebook/opt-2.7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7}, |
93 | | - "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, |
| 99 | + "togethercomputer/RedPajama-INCITE-Chat-3B-v1": { |
| 100 | + "bits": 4, |
| 101 | + "sym": False, |
| 102 | + "group_size": 128, |
| 103 | + "scale_estimation": True, |
| 104 | + }, |
94 | 105 | "lmsys/vicuna-7b-v1.5": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0}, |
95 | 106 | "stabilityai/stablelm-tuned-alpha-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, |
96 | 107 | "mistralai/Mistral-7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.9}, |
|
100 | 111 | "group_size": 128, |
101 | 112 | "ratio": 0.8, |
102 | 113 | "dataset": "wikitext2", |
103 | | - "awq": True, |
| 114 | + "quant_method": OVQuantizationMethod.AWQ, |
104 | 115 | }, |
| 116 | + "openai-community/gpt2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.5, "scale_estimation": True}, |
| 117 | + "lmsys/longchat-7b-16k": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, |
| 118 | + "bigcode/starcoder2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, |
| 119 | + "TinyLlama/TinyLlama-1.1B-Chat-v1.0": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, |
| 120 | + "stabilityai/stablelm-tuned-alpha-7b": { |
| 121 | + "bits": 4, |
| 122 | + "sym": False, |
| 123 | + "group_size": 128, |
| 124 | + "ratio": 0.6, |
| 125 | + "scale_estimation": True, |
| 126 | + }, |
| 127 | + "microsoft/phi-2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, |
105 | 128 | } |
106 | 129 |
|
107 | 130 | _DEFAULT_4BIT_CONFIG = { |
|
113 | 136 | } |
114 | 137 |
|
115 | 138 |
|
116 | | -class OVQuantizationMethod(str, Enum): |
117 | | - DEFAULT = "default" |
118 | | - HYBRID = "hybrid" |
119 | | - AWQ = "awq" |
120 | | - |
121 | | - |
122 | 139 | @dataclass |
123 | 140 | class OVQuantizationConfigBase(QuantizationConfigMixin): |
124 | 141 | """ |
|
0 commit comments