Skip to content

Commit c959c6b

Browse files
authored
Extend support to GraniteMoeSharedForCausalLM architecture (#126)
* feat: extend support to GraniteMoeSharedForCausalLM Signed-off-by: Mehant Kammakomati <[email protected]> * feat: extend support to GraniteMoeSharedForCausalLM Signed-off-by: Mehant Kammakomati <[email protected]> * feat: extend foak support to granitemoeshared arch Signed-off-by: Mehant Kammakomati <[email protected]> * feat: include shared moe as part of moe benches Signed-off-by: Mehant Kammakomati <[email protected]> * fix: lint errors Signed-off-by: Mehant Kammakomati <[email protected]> * feat: grad accum cannot be 0 Signed-off-by: Mehant Kammakomati <[email protected]> * feat: have separate scenario for moe-shared Signed-off-by: Mehant Kammakomati <[email protected]> * feat: add small scenario for moe shared for quick testing Signed-off-by: Mehant Kammakomati <[email protected]> * fix: address review comments Signed-off-by: Mehant Kammakomati <[email protected]> * feat: add partial bench data Signed-off-by: Mehant Kammakomati <[email protected]> * feat: add mixtral results Signed-off-by: Mehant Kammakomati <[email protected]> * feat: update requirements file for moe bench Signed-off-by: Mehant Kammakomati <[email protected]> * fix: revert flash install fix Signed-off-by: Mehant Kammakomati <[email protected]> --------- Signed-off-by: Mehant Kammakomati <[email protected]>
1 parent cbb2bda commit c959c6b

File tree

9 files changed

+241
-68
lines changed

9 files changed

+241
-68
lines changed

plugins/accelerated-moe/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ tox -e run-benches \
5151
-x testenv:run-benches.deps+="-r plugins/accelerated-moe/requirements-khd.txt" \
5252
-x testenv:run-benches.setenv+="MEMORY_LOGGING=nvidia" \
5353
-- \
54-
"1 2 4" 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-scatter
54+
"1 2 4" 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-full
5555
```
5656
or run the larger `Mixtral-8x7B` bench:
5757
```
5858
tox ... \
59-
8 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-scatter-mixtral
59+
8 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-full-mixtral
6060
```
6161

6262
NOTE: if `FileNotFoundError` is observed on the *triton cache*, similar to issues like these:

plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ class ScatterMoEAccelerationPlugin(AccelerationPlugin):
4040
# if we decide to extract the kernels, then we do not need to anymore,
4141
# https://github.com/foundation-model-stack/fms-acceleration/issues/105
4242

43-
restricted_model_archs = ["GraniteMoeForCausalLM", "MixtralForCausalLM"]
43+
restricted_model_archs = [
44+
"GraniteMoeForCausalLM",
45+
"MixtralForCausalLM",
46+
"GraniteMoeSharedForCausalLM",
47+
]
4448

4549
def __init__(self, configurations: Dict[str, Dict]):
4650
super().__init__(configurations)

plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@
7676
SCATTERMOE_SPEC_HAS_GATE,
7777
False,
7878
),
79+
"GraniteMoeSharedForCausalLM": (
80+
"GraniteMoeSharedMoE",
81+
"router",
82+
"input_linear|output_linear|input_linear",
83+
SCATTERMOE_SPEC_HAS_GATE,
84+
False,
85+
),
7986
}
8087

8188

plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def register_foak_model_patch_rules(
4444
gpt_bigcode,
4545
granite,
4646
granitemoe,
47+
granitemoeshared,
4748
llama,
4849
mistral,
4950
mixtral,
@@ -54,6 +55,7 @@ def register_foak_model_patch_rules(
5455
*gpt_bigcode.get_mp_rules(base_type),
5556
*granite.get_mp_rules(base_type, config),
5657
*granitemoe.get_mp_rules(base_type),
58+
*granitemoeshared.get_mp_rules(base_type),
5759
*llama.get_mp_rules(base_type, config),
5860
*mistral.get_mp_rules(base_type, config),
5961
*mixtral.get_mp_rules(base_type),
@@ -91,6 +93,7 @@ class FastKernelsAccelerationPlugin(AccelerationPlugin):
9193
"MixtralForCausalLM",
9294
"LlamaForCausalLM",
9395
"MistralForCausalLM",
96+
"GraniteMoeSharedForCausalLM",
9497
]
9598

9699
def __init__(self, configurations: Dict[str, Dict]):
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright The FMS HF Tuning Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Standard
16+
from functools import partial
17+
18+
# Third Party
19+
from fms_acceleration.model_patcher import (
20+
ModelPatcherRule,
21+
ModelPatcherTrigger,
22+
combine_functions,
23+
combine_triggers,
24+
)
25+
26+
# Local
27+
from ..kernels.unsloth.cross_entropy_loss import (
28+
FastCrossEntropyLoss,
29+
replace_custom_loss_when_triggered,
30+
)
31+
from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
32+
from ..kernels.unsloth.rope_embedding import fast_rope_embedding
33+
from .utils import (
34+
KEY_O,
35+
KEY_QKV,
36+
build_lora_fused_ops,
37+
get_transformers_version,
38+
trigger_fused_ops,
39+
)
40+
41+
42+
def get_mp_rules(base_type: str):
43+
"""
44+
Function to access all patch rules in this module.
45+
If it is a forward_builder rule with `base_type` in
46+
its forward builder argument, wrap the forward_builder
47+
function as a partial function with the base_type argument
48+
"""
49+
try:
50+
# Third Party
51+
from transformers.models.granitemoeshared.modeling_granitemoeshared import ( # pylint: disable=import-outside-toplevel
52+
GraniteMoeSharedAttention,
53+
GraniteMoeSharedForCausalLM,
54+
GraniteMoeSharedRMSNorm,
55+
)
56+
except ImportError:
57+
return []
58+
59+
return [
60+
# TODO: have a generic version of this rule
61+
# - do regex on RMSNorm class name
62+
# - check on the tensors required for fast_rms_layernorm
63+
ModelPatcherRule(
64+
rule_id="granitemoeshared-rms",
65+
trigger=ModelPatcherTrigger(check=GraniteMoeSharedRMSNorm),
66+
forward=fast_rms_layernorm,
67+
),
68+
# TODO: have a generic version of this rule
69+
# - do regex on Attention class name
70+
# - have a set of qkv / o module names and check on that
71+
ModelPatcherRule(
72+
rule_id="granitemoeshared-qkvo",
73+
trigger=combine_triggers(
74+
ModelPatcherTrigger(
75+
check=partial(
76+
trigger_fused_ops,
77+
attn_cls=GraniteMoeSharedAttention,
78+
submodule_names=["q_proj", "k_proj", "v_proj"],
79+
)
80+
),
81+
ModelPatcherTrigger(
82+
check=partial(
83+
trigger_fused_ops,
84+
attn_cls=GraniteMoeSharedAttention,
85+
submodule_names=["o_proj"],
86+
)
87+
),
88+
logic="OR",
89+
),
90+
forward_builder=combine_functions(
91+
partial(
92+
build_lora_fused_ops,
93+
submodule_names=["q_proj", "k_proj", "v_proj"],
94+
fused_op=KEY_QKV,
95+
base_type=base_type,
96+
),
97+
partial(
98+
build_lora_fused_ops,
99+
submodule_names=["o_proj"],
100+
fused_op=KEY_O,
101+
base_type=base_type,
102+
),
103+
logic="APPEND",
104+
),
105+
),
106+
*[
107+
(
108+
ModelPatcherRule(
109+
rule_id="granitemoeshared-custom-loss",
110+
trigger=ModelPatcherTrigger(
111+
check=replace_custom_loss_when_triggered(
112+
GraniteMoeSharedForCausalLM,
113+
custom_loss_type="granite-custom-loss",
114+
)
115+
),
116+
)
117+
if get_transformers_version() >= "4.46"
118+
else ModelPatcherRule(
119+
rule_id="granitemoeshared-cross-ent",
120+
import_and_maybe_reload=(
121+
"torch.nn.CrossEntropyLoss",
122+
FastCrossEntropyLoss,
123+
"transformers.models.granitemoeshared.modeling_granitemoeshared",
124+
),
125+
)
126+
)
127+
],
128+
# TODO: have a generic version of this rule
129+
# - get the module name
130+
# - check if "apply_rotary_pos_emb" exists
131+
# - patch
132+
ModelPatcherRule(
133+
rule_id="granitemoeshared-rope",
134+
import_and_maybe_reload=(
135+
"transformers.models.granitemoeshared.\
136+
modeling_granitemoeshared.apply_rotary_pos_emb",
137+
fast_rope_embedding,
138+
None,
139+
),
140+
),
141+
]

scripts/benchmarks/benchmark.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,11 @@ def build_args_from_products(products: List[Dict], defaults: Dict):
319319
]
320320
)
321321
elif grad_accum is None and pdtbs is not None:
322+
grad_accum_steps = effective_batch_size // num_gpus // pdtbs
322323
argument_list.extend(
323324
[
324325
"--gradient_accumulation_steps",
325-
str(effective_batch_size // num_gpus // pdtbs),
326+
str(1 if grad_accum_steps == 0 else grad_accum_steps),
326327
]
327328
)
328329
else:
Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,42 @@
1-
epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
2-
0.25,none,16.0,71199.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9438143467903136,2371.9316,5.396,0.042,1505.608
3-
0.25,none,8.0,46829.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437569552659988,1355.7096,9.442,0.074,1317.096
4-
0.25,none,4.0,37996.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9437739425897598,708.3914,18.069,0.141,1260.32
5-
0.25,moe-scattermoe-granite-ep1,16.0,71187.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9439476370811464,742.739,17.234,0.135,4808.149
6-
0.25,moe-scattermoe-granite-ep1,8.0,52503.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506204092502594,485.5103,26.364,0.206,3677.78
7-
0.25,moe-scattermoe-granite-ep1,4.0,51145.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572784686088562,262.9566,48.677,0.38,3395.238
8-
0.25,moe-scattermoe-granite-ep2,8.0,40193.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437192791700364,577.2164,22.175,0.173,3093.467
9-
0.25,moe-scattermoe-granite-ep2,4.0,40878.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9509018939733506,300.285,42.626,0.333,2973.176
10-
0.25,moe-scattermoe-granite-ep4,4.0,31777.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9434539985656738,307.1264,41.677,0.326,2906.946
11-
0.25,moe-scattermoe-granite-ep1-padding-free,16.0,48401.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437484860420228,631.9756,20.254,0.158,3924.202
12-
0.25,moe-scattermoe-granite-ep1-padding-free,8.0,42452.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506663566827774,454.3444,28.172,0.22,2729.207
13-
0.25,moe-scattermoe-granite-ep1-padding-free,4.0,38560.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.957276314496994,241.2967,53.047,0.414,2569.451
14-
0.25,moe-scattermoe-granite-ep2-padding-free,8.0,31012.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.943688799738884,546.507,23.421,0.183,2268.955
15-
0.25,moe-scattermoe-granite-ep2-padding-free,4.0,28133.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9505942213535308,283.5444,45.143,0.353,2186.607
16-
0.25,moe-scattermoe-granite-ep4-padding-free,4.0,21585.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9441865116357804,284.6079,44.974,0.351,2178.436
17-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16.0,42651.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437448275089264,615.4528,20.798,0.162,4029.554
18-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8.0,37743.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.950773031115532,433.4811,29.528,0.231,2860.563
19-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4.0,35153.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572476959228516,232.0428,55.162,0.431,2671.921
20-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8.0,26075.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437651455402374,524.7751,24.391,0.191,2362.917
21-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4.0,24665.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9507779973745346,274.126,46.694,0.365,2261.733
22-
0.25,moe-scattermoe-granite-ep4-padding-free-foak,4.0,18368.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.943427557349205,278.1245,46.023,0.36,2229.217
23-
,none,,65607.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599078696966171,4180.9544,3.062,0.024,80.364
24-
,moe-scattermoe-granite-ep8,,52004.75,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8588122856616974,1071.1967,11.949,0.093,313.668
25-
,moe-scattermoe-granite-ep8-foak,,51961.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599798053503036,1043.6675,12.264,0.096,321.942
1+
epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
2+
0.25,none,16,72072,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938093501,1986.7714,6.443,0.05,1797.489
3+
0.25,none,8,49689,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937983845,1082.5484,11.824,0.092,1649.441
4+
0.25,none,4,41754.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.93852025,569.5617,22.473,0.176,1567.521
5+
0.25,moe-scattermoe-granite-ep1,16,72068,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938054211,660.687,19.374,0.151,5405.283
6+
0.25,moe-scattermoe-granite-ep1,8,53917,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944801819,362.751,35.286,0.276,4922.385
7+
0.25,moe-scattermoe-granite-ep1,4,53070,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.95192752,202.3782,63.248,0.494,4411.543
8+
0.25,moe-scattermoe-granite-ep2,8,41880,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938050581,441.5269,28.99,0.226,4044.147
9+
0.25,moe-scattermoe-granite-ep2,4,43092,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945302382,235.4383,54.367,0.425,3792.076
10+
0.25,moe-scattermoe-granite-ep4,4,33673.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938171822,259.2932,49.365,0.386,3443.207
11+
0.25,moe-scattermoe-granite-ep1-padding-free,16,49580,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.937993399,505.6847,25.312,0.198,4904.241
12+
0.25,moe-scattermoe-granite-ep1-padding-free,8,43821,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944808855,311.785,41.054,0.321,3977.099
13+
0.25,moe-scattermoe-granite-ep1-padding-free,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951866873,169.9554,75.314,0.588,3648.016
14+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,49114,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938123143,476.8099,26.845,0.21,5201.235
15+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,43865,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944894351,296.5204,43.167,0.337,4181.837
16+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951975068,163.756,78.165,0.611,3786.12
17+
0.25,moe-scattermoe-granite-ep2-padding-free,8,32276,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937930156,356.1296,35.942,0.281,3481.878
18+
0.25,moe-scattermoe-granite-ep2-padding-free,4,29787,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945339936,192.7168,66.419,0.519,3217.156
19+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,32376,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938017525,342.9327,37.325,0.292,3615.87
20+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,29734.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945357794,184.554,69.356,0.542,3359.451
21+
0.25,moe-scattermoe-granite-ep4-padding-free,4,23386.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938359724,191.205,66.944,0.523,3242.593
22+
0.25,moe-scattermoe-granite-ep4-padding-free-foak,4,23359.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938333818,183.9191,69.596,0.544,3371.048
23+
0.25,none,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878051637,4223.9158,3.03,0.024,839.411
24+
0.25,none,8,74462,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877874975,2247.4716,5.695,0.044,788.798
25+
0.25,none,4,63033,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.878253661,1155.5903,11.077,0.087,767.054
26+
0.25,moe-scattermoe-granite-ep1,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878006854,907.8407,14.099,0.11,3905.531
27+
0.25,moe-scattermoe-granite-ep1,8,73870,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879557709,492.5063,25.99,0.203,3599.548
28+
0.25,moe-scattermoe-granite-ep1,4,74108.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881521969,277.8191,46.073,0.36,3190.565
29+
0.25,moe-scattermoe-granite-ep2,8,54168,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877982622,563.0434,22.734,0.178,3148.603
30+
0.25,moe-scattermoe-granite-ep2,4,54582,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880103117,299.2522,42.773,0.334,2962.05
31+
0.25,moe-scattermoe-granite-ep1-padding-free,16,77632,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878018975,726.1255,17.628,0.138,3410.98
32+
0.25,moe-scattermoe-granite-ep1-padding-free,8,68019,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879643369,429.5618,29.798,0.233,2882.938
33+
0.25,moe-scattermoe-granite-ep1-padding-free,4,63879,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.88148216,239.3677,53.474,0.418,2586.815
34+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,72666,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878073001,688.38,18.594,0.145,3598.013
35+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,63074,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879622684,419.7876,30.492,0.238,2950.063
36+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,60126.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881447418,231.7976,55.221,0.431,2671.296
37+
0.25,moe-scattermoe-granite-ep2-padding-free,8,45093,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.8779908,471.1344,27.168,0.212,2628.549
38+
0.25,moe-scattermoe-granite-ep2-padding-free,4,42590,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.879999972,250.48,51.102,0.399,2472.054
39+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,40281,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.878110015,461.6668,27.726,0.217,2682.454
40+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,38934.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880085612,250.2941,51.14,0.4,2473.889
41+
0.25,moe-scattermoe-granite-ep8,16,56845,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86557077,779.9315,16.412,0.128,430.807
42+
0.25,moe-scattermoe-granite-ep8-foak,16,56769.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86551428,734.0756,17.437,0.136,457.719

0 commit comments

Comments
 (0)