Skip to content

Commit 9122a76

Browse files
authored
feat: Support granite 4 preview architecture for MoE kernels, EP, and fast kernels (#143)
* feat: MoE Kernels, EP, and Fast Kernels for Granite 4 Preview architecture Signed-off-by: Mehant Kammakomati <[email protected]> * feat: update benchmark csv: Signed-off-by: Mehant Kammakomati <[email protected]> * feat: update requirements: Signed-off-by: Mehant Kammakomati <[email protected]> --------- Signed-off-by: Mehant Kammakomati <[email protected]>
1 parent 179848a commit 9122a76

File tree

10 files changed

+270
-106
lines changed

10 files changed

+270
-106
lines changed

plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class ScatterMoEAccelerationPlugin(AccelerationPlugin):
3636
"GraniteMoeForCausalLM",
3737
"MixtralForCausalLM",
3838
"GraniteMoeSharedForCausalLM",
39+
"GraniteMoeHybridForCausalLM",
3940
]
4041

4142
def __init__(self, configurations: Dict[str, Dict]):

plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,13 @@
8383
SCATTERMOE_SPEC_HAS_GATE,
8484
False,
8585
),
86+
"GraniteMoeHybridForCausalLM": (
87+
"GraniteMoeHybridMoE",
88+
"router",
89+
"input_linear|output_linear|input_linear",
90+
SCATTERMOE_SPEC_HAS_GATE,
91+
False,
92+
),
8693
}
8794

8895

plugins/accelerated-peft/requirements.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
accelerate >= 0.29
66

77
# bitsandbytes for the BNB plugin
8-
# - lower bound is because bnb is missing quant_state
9-
# - upper bound is because of segmentation faults
10-
# see https://github.com/foundation-model-stack/fms-acceleration/issues/17
11-
bitsandbytes >=0.41,<=0.43.3
8+
# versions above 0.45.1 to support torch 2.6
9+
10+
bitsandbytes >= 0.45.1
1211

1312
# Used to manage the thread limit in functions for converting old
1413
# GPTQ models to new GPTQ model format that support symmetrical=False

plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def register_foak_model_patch_rules(
4545
granite,
4646
granitemoe,
4747
granitemoeshared,
48+
granitemoehybrid,
4849
llama,
4950
mistral,
5051
mixtral,
@@ -56,6 +57,7 @@ def register_foak_model_patch_rules(
5657
*granite.get_mp_rules(base_type, config),
5758
*granitemoe.get_mp_rules(base_type),
5859
*granitemoeshared.get_mp_rules(base_type),
60+
*granitemoehybrid.get_mp_rules(base_type),
5961
*llama.get_mp_rules(base_type, config),
6062
*mistral.get_mp_rules(base_type, config),
6163
*mixtral.get_mp_rules(base_type),
@@ -94,6 +96,7 @@ class FastKernelsAccelerationPlugin(AccelerationPlugin):
9496
"LlamaForCausalLM",
9597
"MistralForCausalLM",
9698
"GraniteMoeSharedForCausalLM",
99+
"GraniteMoeHybridForCausalLM",
97100
]
98101

99102
def __init__(self, configurations: Dict[str, Dict]):
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright The FMS HF Tuning Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Standard
16+
from functools import partial
17+
18+
# Third Party
19+
from fms_acceleration.model_patcher import (
20+
ModelPatcherRule,
21+
ModelPatcherTrigger,
22+
combine_functions,
23+
combine_triggers,
24+
)
25+
26+
# Local
27+
from ..kernels.unsloth.cross_entropy_loss import (
28+
FastCrossEntropyLoss,
29+
replace_custom_loss_when_triggered,
30+
)
31+
from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
32+
from ..kernels.unsloth.rope_embedding import fast_rope_embedding
33+
from .utils import (
34+
KEY_O,
35+
KEY_QKV,
36+
build_lora_fused_ops,
37+
get_transformers_version,
38+
trigger_fused_ops,
39+
)
40+
41+
42+
def get_mp_rules(base_type: str):
43+
"""
44+
Function to access all patch rules in this module.
45+
If it is a forward_builder rule with `base_type` in
46+
its forward builder argument, wrap the forward_builder
47+
function as a partial function with the base_type argument
48+
"""
49+
try:
50+
# Third Party
51+
from transformers.models.granitemoehybrid.modeling_granitemoehybrid import ( # pylint: disable=import-outside-toplevel
52+
GraniteMoeHybridAttention,
53+
GraniteMoeHybridForCausalLM,
54+
GraniteMoeHybridRMSNorm,
55+
)
56+
except ImportError:
57+
return []
58+
59+
return [
60+
# TODO: have a generic version of this rule
61+
# - do regex on RMSNorm class name
62+
# - check on the tensors required for fast_rms_layernorm
63+
ModelPatcherRule(
64+
rule_id="granitemoehybrid-rms",
65+
trigger=ModelPatcherTrigger(check=GraniteMoeHybridRMSNorm),
66+
forward=fast_rms_layernorm,
67+
),
68+
# TODO: have a generic version of this rule
69+
# - do regex on Attention class name
70+
# - have a set of qkv / o module names and check on that
71+
ModelPatcherRule(
72+
rule_id="granitemoehybrid-qkvo",
73+
trigger=combine_triggers(
74+
ModelPatcherTrigger(
75+
check=partial(
76+
trigger_fused_ops,
77+
attn_cls=GraniteMoeHybridAttention,
78+
submodule_names=["q_proj", "k_proj", "v_proj"],
79+
)
80+
),
81+
ModelPatcherTrigger(
82+
check=partial(
83+
trigger_fused_ops,
84+
attn_cls=GraniteMoeHybridAttention,
85+
submodule_names=["o_proj"],
86+
)
87+
),
88+
logic="OR",
89+
),
90+
forward_builder=combine_functions(
91+
partial(
92+
build_lora_fused_ops,
93+
submodule_names=["q_proj", "k_proj", "v_proj"],
94+
fused_op=KEY_QKV,
95+
base_type=base_type,
96+
),
97+
partial(
98+
build_lora_fused_ops,
99+
submodule_names=["o_proj"],
100+
fused_op=KEY_O,
101+
base_type=base_type,
102+
),
103+
logic="APPEND",
104+
),
105+
),
106+
*[
107+
(
108+
ModelPatcherRule(
109+
rule_id="granitemoehybrid-custom-loss",
110+
trigger=ModelPatcherTrigger(
111+
check=replace_custom_loss_when_triggered(
112+
GraniteMoeHybridForCausalLM,
113+
custom_loss_type="granite-custom-loss",
114+
)
115+
),
116+
)
117+
if get_transformers_version() >= "4.46"
118+
else ModelPatcherRule(
119+
rule_id="granitemoehybrid-cross-ent",
120+
import_and_maybe_reload=(
121+
"torch.nn.CrossEntropyLoss",
122+
FastCrossEntropyLoss,
123+
"transformers.models.granitemoehybrid.modeling_granitemoehybrid",
124+
),
125+
)
126+
)
127+
],
128+
# TODO: have a generic version of this rule
129+
# - get the module name
130+
# - check if "apply_rotary_pos_emb" exists
131+
# - patch
132+
ModelPatcherRule(
133+
rule_id="granitemoehybrid-rope",
134+
import_and_maybe_reload=(
135+
"transformers.models.granitemoehybrid.\
136+
modeling_granitemoehybrid.apply_rotary_pos_emb",
137+
fast_rope_embedding,
138+
None,
139+
),
140+
),
141+
]

scripts/benchmarks/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ The best way is via `tox` which manages the dependencies, including installing t
5656
pip install -r setup_requirements.txt
5757
```
5858

59+
- install mamba kernels to evaluate mamba based models:
60+
```
61+
tox -e run-benches -x testenv:run-benches.setenv+="INSTALL_MAMBA=true" ...
62+
```
63+
5964
- run a *small* representative set of benches:
6065
```
6166
tox -e run-benches
Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,42 @@
11
epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
2-
0.25,none,16,72072,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938093501,1986.7714,6.443,0.05,1797.489
3-
0.25,none,8,49689,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937983845,1082.5484,11.824,0.092,1649.441
4-
0.25,none,4,41754.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.93852025,569.5617,22.473,0.176,1567.521
5-
0.25,moe-scattermoe-granite-ep1,16,72068,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938054211,660.687,19.374,0.151,5405.283
6-
0.25,moe-scattermoe-granite-ep1,8,53917,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944801819,362.751,35.286,0.276,4922.385
7-
0.25,moe-scattermoe-granite-ep1,4,53070,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.95192752,202.3782,63.248,0.494,4411.543
8-
0.25,moe-scattermoe-granite-ep2,8,41880,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938050581,441.5269,28.99,0.226,4044.147
9-
0.25,moe-scattermoe-granite-ep2,4,43092,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945302382,235.4383,54.367,0.425,3792.076
10-
0.25,moe-scattermoe-granite-ep4,4,33673.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938171822,259.2932,49.365,0.386,3443.207
11-
0.25,moe-scattermoe-granite-ep1-padding-free,16,49580,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.937993399,505.6847,25.312,0.198,4904.241
12-
0.25,moe-scattermoe-granite-ep1-padding-free,8,43821,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944808855,311.785,41.054,0.321,3977.099
13-
0.25,moe-scattermoe-granite-ep1-padding-free,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951866873,169.9554,75.314,0.588,3648.016
14-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,49114,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938123143,476.8099,26.845,0.21,5201.235
15-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,43865,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944894351,296.5204,43.167,0.337,4181.837
16-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951975068,163.756,78.165,0.611,3786.12
17-
0.25,moe-scattermoe-granite-ep2-padding-free,8,32276,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937930156,356.1296,35.942,0.281,3481.878
18-
0.25,moe-scattermoe-granite-ep2-padding-free,4,29787,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945339936,192.7168,66.419,0.519,3217.156
19-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,32376,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938017525,342.9327,37.325,0.292,3615.87
20-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,29734.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945357794,184.554,69.356,0.542,3359.451
21-
0.25,moe-scattermoe-granite-ep4-padding-free,4,23386.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938359724,191.205,66.944,0.523,3242.593
22-
0.25,moe-scattermoe-granite-ep4-padding-free-foak,4,23359.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938333818,183.9191,69.596,0.544,3371.048
23-
0.25,none,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878051637,4223.9158,3.03,0.024,839.411
24-
0.25,none,8,74462,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877874975,2247.4716,5.695,0.044,788.798
25-
0.25,none,4,63033,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.878253661,1155.5903,11.077,0.087,767.054
26-
0.25,moe-scattermoe-granite-ep1,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878006854,907.8407,14.099,0.11,3905.531
27-
0.25,moe-scattermoe-granite-ep1,8,73870,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879557709,492.5063,25.99,0.203,3599.548
28-
0.25,moe-scattermoe-granite-ep1,4,74108.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881521969,277.8191,46.073,0.36,3190.565
29-
0.25,moe-scattermoe-granite-ep2,8,54168,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877982622,563.0434,22.734,0.178,3148.603
30-
0.25,moe-scattermoe-granite-ep2,4,54582,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880103117,299.2522,42.773,0.334,2962.05
31-
0.25,moe-scattermoe-granite-ep1-padding-free,16,77632,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878018975,726.1255,17.628,0.138,3410.98
32-
0.25,moe-scattermoe-granite-ep1-padding-free,8,68019,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879643369,429.5618,29.798,0.233,2882.938
33-
0.25,moe-scattermoe-granite-ep1-padding-free,4,63879,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.88148216,239.3677,53.474,0.418,2586.815
34-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,72666,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878073001,688.38,18.594,0.145,3598.013
35-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,63074,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879622684,419.7876,30.492,0.238,2950.063
36-
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,60126.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881447418,231.7976,55.221,0.431,2671.296
37-
0.25,moe-scattermoe-granite-ep2-padding-free,8,45093,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.8779908,471.1344,27.168,0.212,2628.549
38-
0.25,moe-scattermoe-granite-ep2-padding-free,4,42590,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.879999972,250.48,51.102,0.399,2472.054
39-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,40281,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.878110015,461.6668,27.726,0.217,2682.454
40-
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,38934.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880085612,250.2941,51.14,0.4,2473.889
41-
0.25,moe-scattermoe-granite-ep8,16,56845,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86557077,779.9315,16.412,0.128,430.807
42-
0.25,moe-scattermoe-granite-ep8-foak,16,56769.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86551428,734.0756,17.437,0.136,457.719
2+
0.25,none,16,77748,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.93802941,1830.0024,6.995,0.055,1951.473
3+
0.25,none,8,56837,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937978864,970.492,13.189,0.103,1839.891
4+
0.25,none,4,47395,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938433378,508.0143,25.196,0.197,1757.431
5+
0.25,moe-scattermoe-granite-ep1,16,78376,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938084066,656.3588,19.502,0.152,5440.927
6+
0.25,moe-scattermoe-granite-ep2,8,45422,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938047224,439.6446,29.114,0.227,4061.462
7+
0.25,moe-scattermoe-granite-ep2,4,46506,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945220579,234.7146,54.534,0.426,3803.769
8+
0.25,moe-scattermoe-granite-ep4,4,37025.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938337043,255.5461,50.089,0.391,3493.694
9+
0.25,moe-scattermoe-granite-ep1-padding-free,16,49462,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,1.196784412,431.9774,29.631,0.231,5741.041
10+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,49060,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,1.200383433,398.976,32.082,0.251,6215.913
11+
0.25,moe-scattermoe-granite-ep2-padding-free,8,32265,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,1.198062455,335.7106,38.128,0.298,3693.657
12+
0.25,moe-scattermoe-granite-ep2-padding-free,4,29720,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,1.210442821,180.924,70.748,0.553,3426.854
13+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,32285,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,1.199450992,320.9043,39.887,0.312,3864.08
14+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,29771,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,1.21032447,175.5856,72.899,0.57,3531.042
15+
0.25,moe-scattermoe-granite-ep4-padding-free,4,23248,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,1.200576434,175.5905,72.897,0.57,3530.942
16+
0.25,moe-scattermoe-granite-ep4-padding-free-foak,4,23422,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,1.199994416,173.1652,73.918,0.577,3580.397
17+
0.25,none,16,78704,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878016037,3924.9586,3.261,0.025,903.347
18+
0.25,none,8,79299,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877915607,2059.5193,6.215,0.049,860.783
19+
0.25,none,4,67966.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.878266089,1054.1087,12.143,0.095,840.9
20+
0.25,moe-scattermoe-granite-ep1,16,80638,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878047609,899.6248,14.228,0.111,3941.198
21+
0.25,moe-scattermoe-granite-ep2,8,58769,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877957979,550.4483,23.254,0.182,3220.647
22+
0.25,moe-scattermoe-granite-ep2,4,58932,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880045412,300.1744,42.642,0.333,2952.95
23+
0.25,moe-scattermoe-granite-ep1-padding-free,16,77512,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,1.256636418,630.1126,20.314,0.159,3930.726
24+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,72604,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,1.261311768,598.0884,21.402,0.167,4141.194
25+
0.25,moe-scattermoe-granite-ep2-padding-free,8,45237,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,1.259768015,436.0593,29.354,0.229,2839.981
26+
0.25,moe-scattermoe-granite-ep2-padding-free,4,42449,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,1.267803932,236.4495,54.134,0.423,2618.741
27+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,40279,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,1.262602715,434.2257,29.478,0.23,2851.973
28+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,38827,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,1.268255376,231.5911,55.27,0.432,2673.678
29+
0.25,none,16,78670,ibm-granite/granite-4.0-tiny-preview,1,8,bfloat16,0.855069562,4106.7072,3.117,0.024,863.368
30+
0.25,none,8,77928,ibm-granite/granite-4.0-tiny-preview,2,8,bfloat16,0.854871556,2107.9397,6.072,0.047,841.011
31+
0.25,none,4,70000,ibm-granite/granite-4.0-tiny-preview,4,8,bfloat16,0.855348825,1117.3656,11.456,0.089,793.295
32+
0.25,moe-scattermoe-granite-ep1,16,78634,ibm-granite/granite-4.0-tiny-preview,1,8,bfloat16,0.855006663,968.4797,13.217,0.103,3660.996
33+
0.25,moe-scattermoe-granite-ep2,8,61692,ibm-granite/granite-4.0-tiny-preview,2,8,bfloat16,0.854951358,611.0101,20.949,0.164,2901.425
34+
0.25,moe-scattermoe-granite-ep2,4,61213,ibm-granite/granite-4.0-tiny-preview,4,8,bfloat16,0.856631212,337.9265,37.878,0.296,2623.055
35+
0.25,moe-scattermoe-granite-ep1-padding-free,16,79842,ibm-granite/granite-4.0-tiny-preview,1,8,bfloat16,0.852907363,823.4639,15.544,0.121,3007.782
36+
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,76916,ibm-granite/granite-4.0-tiny-preview,1,8,bfloat16,0.852861792,734.3252,17.431,0.136,3372.893
37+
0.25,moe-scattermoe-granite-ep2-padding-free,8,48068,ibm-granite/granite-4.0-tiny-preview,2,8,bfloat16,0.852783817,554.2306,23.095,0.18,2234.449
38+
0.25,moe-scattermoe-granite-ep2-padding-free,4,44790,ibm-granite/granite-4.0-tiny-preview,4,8,bfloat16,0.854414411,308.2351,41.527,0.324,2008.856
39+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,43180,ibm-granite/granite-4.0-tiny-preview,2,8,bfloat16,0.85276741,541.444,23.64,0.185,2287.217
40+
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,41128,ibm-granite/granite-4.0-tiny-preview,4,8,bfloat16,0.854435267,308.1642,41.536,0.325,2009.318
41+
0.25,moe-scattermoe-granite-ep8,16,56687.5,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8654898,810.9653,15.784,0.123,414.321
42+
0.25,moe-scattermoe-granite-ep8-foak,16,56710.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86548216,775.5419,16.505,0.129,433.245

0 commit comments

Comments
 (0)