diff --git a/plugins/accelerated-moe/README.md b/plugins/accelerated-moe/README.md
index ff1c31b2..45ae209d 100644
--- a/plugins/accelerated-moe/README.md
+++ b/plugins/accelerated-moe/README.md
@@ -51,12 +51,12 @@ tox -e run-benches \
     -x testenv:run-benches.deps+="-r plugins/accelerated-moe/requirements-khd.txt" \
     -x testenv:run-benches.setenv+="MEMORY_LOGGING=nvidia" \
     -- \
-    "1 2 4" 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-scatter
+    "1 2 4" 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-full
 ```
 or run the larger `Mixtral-8x7B` bench:
 ```
 tox ... \
-    8 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-scatter-mixtral
+    8 128 benchmark_outputs scenarios-moe.yaml accelerated-moe-full-mixtral
 ```
 
 NOTE: if `FileNotFoundError` is observed on the *triton cache*, similar to issues like these:
diff --git a/plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py b/plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py
index 528693ea..2ad26ed2 100644
--- a/plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py
+++ b/plugins/accelerated-moe/src/fms_acceleration_moe/framework_plugin_scattermoe.py
@@ -40,7 +40,11 @@ class ScatterMoEAccelerationPlugin(AccelerationPlugin):
     # if we decide to extract the kernels, then we do not need to anymore,
     # https://github.com/foundation-model-stack/fms-acceleration/issues/105
 
-    restricted_model_archs = ["GraniteMoeForCausalLM", "MixtralForCausalLM"]
+    restricted_model_archs = [
+        "GraniteMoeForCausalLM",
+        "MixtralForCausalLM",
+        "GraniteMoeSharedForCausalLM",
+    ]
 
     def __init__(self, configurations: Dict[str, Dict]):
         super().__init__(configurations)
diff --git a/plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py b/plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py
index be3057b5..2a6847be 100644
--- a/plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py
+++ b/plugins/accelerated-moe/src/fms_acceleration_moe/utils/scattermoe_constants.py
@@ -76,6 +76,13 @@
         SCATTERMOE_SPEC_HAS_GATE,
         False,
     ),
+    "GraniteMoeSharedForCausalLM": (
+        "GraniteMoeSharedMoE",
+        "router",
+        "input_linear|output_linear|input_linear",
+        SCATTERMOE_SPEC_HAS_GATE,
+        False,
+    ),
 }
 
 
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
index b1a4b2b3..818a7e51 100644
--- a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
+++ b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
@@ -44,6 +44,7 @@ def register_foak_model_patch_rules(
         gpt_bigcode,
         granite,
         granitemoe,
+        granitemoeshared,
         llama,
         mistral,
         mixtral,
@@ -54,6 +55,7 @@ def register_foak_model_patch_rules(
         *gpt_bigcode.get_mp_rules(base_type),
         *granite.get_mp_rules(base_type, config),
         *granitemoe.get_mp_rules(base_type),
+        *granitemoeshared.get_mp_rules(base_type),
         *llama.get_mp_rules(base_type, config),
         *mistral.get_mp_rules(base_type, config),
         *mixtral.get_mp_rules(base_type),
@@ -91,6 +93,7 @@ class FastKernelsAccelerationPlugin(AccelerationPlugin):
         "MixtralForCausalLM",
         "LlamaForCausalLM",
         "MistralForCausalLM",
+        "GraniteMoeSharedForCausalLM",
     ]
 
     def __init__(self, configurations: Dict[str, Dict]):
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granitemoeshared.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granitemoeshared.py
new file mode 100644
index 00000000..dd28e7ac
--- /dev/null
+++ b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granitemoeshared.py
@@ -0,0 +1,141 @@
+# Copyright The FMS HF Tuning Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Standard
+from functools import partial
+
+# Third Party
+from fms_acceleration.model_patcher import (
+    ModelPatcherRule,
+    ModelPatcherTrigger,
+    combine_functions,
+    combine_triggers,
+)
+
+# Local
+from ..kernels.unsloth.cross_entropy_loss import (
+    FastCrossEntropyLoss,
+    replace_custom_loss_when_triggered,
+)
+from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
+from ..kernels.unsloth.rope_embedding import fast_rope_embedding
+from .utils import (
+    KEY_O,
+    KEY_QKV,
+    build_lora_fused_ops,
+    get_transformers_version,
+    trigger_fused_ops,
+)
+
+
+def get_mp_rules(base_type: str):
+    """
+    Function to access all patch rules in this module.
+    If it is a forward_builder rule with `base_type` in
+    its forward builder argument, wrap the forward_builder
+    function as a partial function with the base_type argument
+    """
+    try:
+        # Third Party
+        from transformers.models.granitemoeshared.modeling_granitemoeshared import (  # pylint: disable=import-outside-toplevel
+            GraniteMoeSharedAttention,
+            GraniteMoeSharedForCausalLM,
+            GraniteMoeSharedRMSNorm,
+        )
+    except ImportError:
+        return []
+
+    return [
+        # TODO: have a generic version of this rule
+        # - do regex on RMSNorm class name
+        # - check on the tensors required for fast_rms_layernorm
+        ModelPatcherRule(
+            rule_id="granitemoeshared-rms",
+            trigger=ModelPatcherTrigger(check=GraniteMoeSharedRMSNorm),
+            forward=fast_rms_layernorm,
+        ),
+        # TODO: have a generic version of this rule
+        # - do regex on Attention class name
+        # - have a set of qkv / o module names and check on that
+        ModelPatcherRule(
+            rule_id="granitemoeshared-qkvo",
+            trigger=combine_triggers(
+                ModelPatcherTrigger(
+                    check=partial(
+                        trigger_fused_ops,
+                        attn_cls=GraniteMoeSharedAttention,
+                        submodule_names=["q_proj", "k_proj", "v_proj"],
+                    )
+                ),
+                ModelPatcherTrigger(
+                    check=partial(
+                        trigger_fused_ops,
+                        attn_cls=GraniteMoeSharedAttention,
+                        submodule_names=["o_proj"],
+                    )
+                ),
+                logic="OR",
+            ),
+            forward_builder=combine_functions(
+                partial(
+                    build_lora_fused_ops,
+                    submodule_names=["q_proj", "k_proj", "v_proj"],
+                    fused_op=KEY_QKV,
+                    base_type=base_type,
+                ),
+                partial(
+                    build_lora_fused_ops,
+                    submodule_names=["o_proj"],
+                    fused_op=KEY_O,
+                    base_type=base_type,
+                ),
+                logic="APPEND",
+            ),
+        ),
+        *[
+            (
+                ModelPatcherRule(
+                    rule_id="granitemoeshared-custom-loss",
+                    trigger=ModelPatcherTrigger(
+                        check=replace_custom_loss_when_triggered(
+                            GraniteMoeSharedForCausalLM,
+                            custom_loss_type="granite-custom-loss",
+                        )
+                    ),
+                )
+                if get_transformers_version() >= "4.46"
+                else ModelPatcherRule(
+                    rule_id="granitemoeshared-cross-ent",
+                    import_and_maybe_reload=(
+                        "torch.nn.CrossEntropyLoss",
+                        FastCrossEntropyLoss,
+                        "transformers.models.granitemoeshared.modeling_granitemoeshared",
+                    ),
+                )
+            )
+        ],
+        # TODO: have a generic version of this rule
+        # - get the module name
+        # - check if "apply_rotary_pos_emb" exists
+        # - patch
+        ModelPatcherRule(
+            rule_id="granitemoeshared-rope",
+            import_and_maybe_reload=(
+                "transformers.models.granitemoeshared.\
+                    modeling_granitemoeshared.apply_rotary_pos_emb",
+                fast_rope_embedding,
+                None,
+            ),
+        ),
+    ]
diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
index 72e0e23c..9a53ceb0 100644
--- a/scripts/benchmarks/benchmark.py
+++ b/scripts/benchmarks/benchmark.py
@@ -319,10 +319,11 @@ def build_args_from_products(products: List[Dict], defaults: Dict):
                     ]
                 )
             elif grad_accum is None and pdtbs is not None:
+                grad_accum_steps = effective_batch_size // num_gpus // pdtbs
                 argument_list.extend(
                     [
                         "--gradient_accumulation_steps",
-                        str(effective_batch_size // num_gpus // pdtbs),
+                        str(1 if grad_accum_steps == 0 else grad_accum_steps),
                     ]
                 )
             else:
diff --git a/scripts/benchmarks/refs/a100_80gb_moe.csv b/scripts/benchmarks/refs/a100_80gb_moe.csv
index 4936cd6f..4bdbb8d9 100644
--- a/scripts/benchmarks/refs/a100_80gb_moe.csv
+++ b/scripts/benchmarks/refs/a100_80gb_moe.csv
@@ -1,25 +1,42 @@
-epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
-0.25,none,16.0,71199.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9438143467903136,2371.9316,5.396,0.042,1505.608
-0.25,none,8.0,46829.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437569552659988,1355.7096,9.442,0.074,1317.096
-0.25,none,4.0,37996.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9437739425897598,708.3914,18.069,0.141,1260.32
-0.25,moe-scattermoe-granite-ep1,16.0,71187.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9439476370811464,742.739,17.234,0.135,4808.149
-0.25,moe-scattermoe-granite-ep1,8.0,52503.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506204092502594,485.5103,26.364,0.206,3677.78
-0.25,moe-scattermoe-granite-ep1,4.0,51145.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572784686088562,262.9566,48.677,0.38,3395.238
-0.25,moe-scattermoe-granite-ep2,8.0,40193.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437192791700364,577.2164,22.175,0.173,3093.467
-0.25,moe-scattermoe-granite-ep2,4.0,40878.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9509018939733506,300.285,42.626,0.333,2973.176
-0.25,moe-scattermoe-granite-ep4,4.0,31777.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9434539985656738,307.1264,41.677,0.326,2906.946
-0.25,moe-scattermoe-granite-ep1-padding-free,16.0,48401.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437484860420228,631.9756,20.254,0.158,3924.202
-0.25,moe-scattermoe-granite-ep1-padding-free,8.0,42452.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506663566827774,454.3444,28.172,0.22,2729.207
-0.25,moe-scattermoe-granite-ep1-padding-free,4.0,38560.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.957276314496994,241.2967,53.047,0.414,2569.451
-0.25,moe-scattermoe-granite-ep2-padding-free,8.0,31012.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.943688799738884,546.507,23.421,0.183,2268.955
-0.25,moe-scattermoe-granite-ep2-padding-free,4.0,28133.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9505942213535308,283.5444,45.143,0.353,2186.607
-0.25,moe-scattermoe-granite-ep4-padding-free,4.0,21585.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9441865116357804,284.6079,44.974,0.351,2178.436
-0.25,moe-scattermoe-granite-ep1-padding-free-foak,16.0,42651.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437448275089264,615.4528,20.798,0.162,4029.554
-0.25,moe-scattermoe-granite-ep1-padding-free-foak,8.0,37743.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.950773031115532,433.4811,29.528,0.231,2860.563
-0.25,moe-scattermoe-granite-ep1-padding-free-foak,4.0,35153.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572476959228516,232.0428,55.162,0.431,2671.921
-0.25,moe-scattermoe-granite-ep2-padding-free-foak,8.0,26075.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437651455402374,524.7751,24.391,0.191,2362.917
-0.25,moe-scattermoe-granite-ep2-padding-free-foak,4.0,24665.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9507779973745346,274.126,46.694,0.365,2261.733
-0.25,moe-scattermoe-granite-ep4-padding-free-foak,4.0,18368.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.943427557349205,278.1245,46.023,0.36,2229.217
-,none,,65607.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599078696966171,4180.9544,3.062,0.024,80.364
-,moe-scattermoe-granite-ep8,,52004.75,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8588122856616974,1071.1967,11.949,0.093,313.668
-,moe-scattermoe-granite-ep8-foak,,51961.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599798053503036,1043.6675,12.264,0.096,321.942
+epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
+0.25,none,16,72072,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938093501,1986.7714,6.443,0.05,1797.489
+0.25,none,8,49689,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937983845,1082.5484,11.824,0.092,1649.441
+0.25,none,4,41754.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.93852025,569.5617,22.473,0.176,1567.521
+0.25,moe-scattermoe-granite-ep1,16,72068,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938054211,660.687,19.374,0.151,5405.283
+0.25,moe-scattermoe-granite-ep1,8,53917,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944801819,362.751,35.286,0.276,4922.385
+0.25,moe-scattermoe-granite-ep1,4,53070,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.95192752,202.3782,63.248,0.494,4411.543
+0.25,moe-scattermoe-granite-ep2,8,41880,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938050581,441.5269,28.99,0.226,4044.147
+0.25,moe-scattermoe-granite-ep2,4,43092,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945302382,235.4383,54.367,0.425,3792.076
+0.25,moe-scattermoe-granite-ep4,4,33673.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938171822,259.2932,49.365,0.386,3443.207
+0.25,moe-scattermoe-granite-ep1-padding-free,16,49580,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.937993399,505.6847,25.312,0.198,4904.241
+0.25,moe-scattermoe-granite-ep1-padding-free,8,43821,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944808855,311.785,41.054,0.321,3977.099
+0.25,moe-scattermoe-granite-ep1-padding-free,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951866873,169.9554,75.314,0.588,3648.016
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,49114,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.938123143,476.8099,26.845,0.21,5201.235
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,43865,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.944894351,296.5204,43.167,0.337,4181.837
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,40070.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.951975068,163.756,78.165,0.611,3786.12
+0.25,moe-scattermoe-granite-ep2-padding-free,8,32276,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.937930156,356.1296,35.942,0.281,3481.878
+0.25,moe-scattermoe-granite-ep2-padding-free,4,29787,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945339936,192.7168,66.419,0.519,3217.156
+0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,32376,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.938017525,342.9327,37.325,0.292,3615.87
+0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,29734.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.945357794,184.554,69.356,0.542,3359.451
+0.25,moe-scattermoe-granite-ep4-padding-free,4,23386.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938359724,191.205,66.944,0.523,3242.593
+0.25,moe-scattermoe-granite-ep4-padding-free-foak,4,23359.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.938333818,183.9191,69.596,0.544,3371.048
+0.25,none,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878051637,4223.9158,3.03,0.024,839.411
+0.25,none,8,74462,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877874975,2247.4716,5.695,0.044,788.798
+0.25,none,4,63033,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.878253661,1155.5903,11.077,0.087,767.054
+0.25,moe-scattermoe-granite-ep1,16,81018,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878006854,907.8407,14.099,0.11,3905.531
+0.25,moe-scattermoe-granite-ep1,8,73870,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879557709,492.5063,25.99,0.203,3599.548
+0.25,moe-scattermoe-granite-ep1,4,74108.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881521969,277.8191,46.073,0.36,3190.565
+0.25,moe-scattermoe-granite-ep2,8,54168,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.877982622,563.0434,22.734,0.178,3148.603
+0.25,moe-scattermoe-granite-ep2,4,54582,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880103117,299.2522,42.773,0.334,2962.05
+0.25,moe-scattermoe-granite-ep1-padding-free,16,77632,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878018975,726.1255,17.628,0.138,3410.98
+0.25,moe-scattermoe-granite-ep1-padding-free,8,68019,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879643369,429.5618,29.798,0.233,2882.938
+0.25,moe-scattermoe-granite-ep1-padding-free,4,63879,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.88148216,239.3677,53.474,0.418,2586.815
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,16,72666,ibm-research/moe-7b-1b-active-shared-experts,1,8,bfloat16,0.878073001,688.38,18.594,0.145,3598.013
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,8,63074,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.879622684,419.7876,30.492,0.238,2950.063
+0.25,moe-scattermoe-granite-ep1-padding-free-foak,4,60126.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.881447418,231.7976,55.221,0.431,2671.296
+0.25,moe-scattermoe-granite-ep2-padding-free,8,45093,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.8779908,471.1344,27.168,0.212,2628.549
+0.25,moe-scattermoe-granite-ep2-padding-free,4,42590,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.879999972,250.48,51.102,0.399,2472.054
+0.25,moe-scattermoe-granite-ep2-padding-free-foak,8,40281,ibm-research/moe-7b-1b-active-shared-experts,2,8,bfloat16,0.878110015,461.6668,27.726,0.217,2682.454
+0.25,moe-scattermoe-granite-ep2-padding-free-foak,4,38934.5,ibm-research/moe-7b-1b-active-shared-experts,4,8,bfloat16,0.880085612,250.2941,51.14,0.4,2473.889
+0.25,moe-scattermoe-granite-ep8,16,56845,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86557077,779.9315,16.412,0.128,430.807
+0.25,moe-scattermoe-granite-ep8-foak,16,56769.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.86551428,734.0756,17.437,0.136,457.719
\ No newline at end of file
diff --git a/scripts/benchmarks/refs/requirements_moe.txt b/scripts/benchmarks/refs/requirements_moe.txt
index 63700ed0..9f5fc5f5 100644
--- a/scripts/benchmarks/refs/requirements_moe.txt
+++ b/scripts/benchmarks/refs/requirements_moe.txt
@@ -1,44 +1,42 @@
 accelerate==1.0.1
-aiohappyeyeballs==2.4.3
-aiohttp==3.10.10
-aiosignal==1.3.1
-async-timeout==4.0.3
-attrs==24.2.0
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.12
+aiosignal==1.3.2
+attrs==25.1.0
 bitsandbytes==0.43.3
-certifi==2024.8.30
-charset-normalizer==3.4.0
-contourpy==1.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+contourpy==1.3.1
 cycler==0.12.1
 datasets==2.21.0
 dill==0.3.8
-docstring_parser==0.16
-einops==0.8.0
-filelock==3.16.1
-flash-attn==2.6.3
+einops==0.8.1
+filelock==3.17.0
+flash_attn==2.7.4.post1
 -e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration&subdirectory=plugins/framework
 -e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_aadp&subdirectory=plugins/attention-and-distributed-packing
 -e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels
 -e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_moe&subdirectory=plugins/accelerated-moe
 -e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft
-fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@398c2a8fe26d734344240555585d95e05299faa8
-fonttools==4.54.1
+fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@fdc7527510692ada03e4303df1549cebc5139b31
+fonttools==4.56.0
 frozenlist==1.5.0
 fsspec==2024.6.1
-huggingface-hub==0.26.2
+huggingface-hub==0.29.0
 idna==3.10
-Jinja2==3.1.4
+Jinja2==3.1.5
 kernel-hyperdrive @ git+https://github.com/fabianlim/kernel-hyperdrive.git@45036497e12444ca98a6f0072204538aee4543ba
-kiwisolver==1.4.7
-llvmlite==0.43.0
+kiwisolver==1.4.8
+llvmlite==0.44.0
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
-matplotlib==3.9.2
+matplotlib==3.10.0
 mdurl==0.1.2
 mpmath==1.3.0
 multidict==6.1.0
 multiprocess==0.70.16
 networkx==3.4.2
-numba==0.60.0
+numba==0.61.0
 numpy==1.26.4
 nvidia-cublas-cu12==12.1.3.1
 nvidia-cuda-cupti-cu12==12.1.105
@@ -49,41 +47,41 @@ nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
 nvidia-cusparse-cu12==12.1.0.106
+nvidia-cusparselt-cu12==0.6.2
 nvidia-nccl-cu12==2.20.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.1.105
 packaging==24.2
 pandas==2.2.3
 peft==0.13.2
-pillow==11.0.0
-propcache==0.2.0
-protobuf==5.28.3
-psutil==6.1.0
-pyarrow==18.0.0
-Pygments==2.18.0
-pyparsing==3.2.0
+pillow==11.1.0
+propcache==0.2.1
+protobuf==5.29.3
+psutil==7.0.0
+pyarrow==19.0.1
+Pygments==2.19.1
+pyparsing==3.2.1
 python-dateutil==2.9.0.post0
-pytz==2024.2
+pytz==2025.1
 PyYAML==6.0.2
 regex==2024.11.6
 requests==2.32.3
 rich==13.9.4
-safetensors==0.4.5
+safetensors==0.5.2
 sentencepiece==0.2.0
-shtab==1.7.1
+setuptools==75.8.0
 simpleeval==0.9.13
-six==1.16.0
+six==1.17.0
 sympy==1.13.1
 threadpoolctl==3.5.0
-tokenizers==0.20.3
+tokenizers==0.21.0
 torch==2.4.1
-tqdm==4.67.0
-transformers==4.45.2
+tqdm==4.67.1
+transformers==4.49.0
 triton==3.0.0
-trl==0.11.4
+trl==0.14.0
 typing_extensions==4.12.2
-tyro==0.8.14
-tzdata==2024.2
-urllib3==2.2.3
+tzdata==2025.1
+urllib3==2.3.0
 xxhash==3.5.0
-yarl==1.17.1
+yarl==1.18.3
diff --git a/scripts/benchmarks/scenarios-moe.yaml b/scripts/benchmarks/scenarios-moe.yaml
index efa2725e..a5171e11 100644
--- a/scripts/benchmarks/scenarios-moe.yaml
+++ b/scripts/benchmarks/scenarios-moe.yaml
@@ -58,6 +58,8 @@ scenarios:
             adam_epsilon: 1e-8
             model_name_or_path: 
                 - 'ibm-granite/granite-3.0-3b-a800m-instruct'
+                - 'ibm-research/moe-7b-1b-active-shared-experts'
+
 
     -   name: accelerated-moe-full-mixtral
         framework_config: