nod-ai
diff --git a/‎sharktank/sharktank/examples/export_paged_llm_v1.py‎
Lines changed: 2 additions & 2 deletions b/‎sharktank/sharktank/examples/export_paged_llm_v1.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sharktank/sharktank/examples/paged_llm_v1.py‎
Lines changed: 9 additions & 9 deletions b/‎sharktank/sharktank/examples/paged_llm_v1.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎sharktank/sharktank/examples/pipeline/export_ppffn_net.py‎
Lines changed: 5 additions & 6 deletions b/‎sharktank/sharktank/examples/pipeline/export_ppffn_net.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎sharktank/sharktank/examples/sharding/export_ffn_net.py‎
Lines changed: 5 additions & 6 deletions b/‎sharktank/sharktank/examples/sharding/export_ffn_net.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎sharktank/sharktank/examples/sharding/shard_llm_dataset.py‎
Lines changed: 4 additions & 4 deletions b/‎sharktank/sharktank/examples/sharding/shard_llm_dataset.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sharktank/sharktank/examples/validate_paged_llama_model.py‎
Lines changed: 1 addition & 1 deletion b/‎sharktank/sharktank/examples/validate_paged_llama_model.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sharktank/sharktank/export_layer/export_kv_cache.py‎
Lines changed: 1 addition & 1 deletion b/‎sharktank/sharktank/export_layer/export_kv_cache.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sharktank/sharktank/export_layer/export_moe.py‎
Lines changed: 1 addition & 1 deletion b/‎sharktank/sharktank/export_layer/export_moe.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sharktank/sharktank/kernels/base.py‎
Lines changed: 1 addition & 1 deletion b/‎sharktank/sharktank/kernels/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sharktank/sharktank/layers/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎sharktank/sharktank/layers/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -6,6 +6,7 @@
 
 """Export support for the PagedLLMV1 protocol of models."""
 
+import os
 import json
 from typing import Any, Dict
 import torch
@@ -16,14 +17,13 @@
 from sharktank.types import *
 from sharktank.utils.math import ceildiv
 from sharktank import ops
+from sharktank.utils import cli
 
 # TODO: Should be using a base class with the protocol supported.
 from sharktank.models.llm import *
 
 
 def main():
-    from ..utils import cli
-    import os
 
     parser = cli.create_parser()
     cli.add_input_dataset_options(parser)
 
@@ -12,17 +12,17 @@
 from typing import Optional
 import torch
 import numpy as np
-from ..layers import *
-from ..types import *
 
-from ..ops import replicate, unshard
+from sharktank.layers import *
+from sharktank.types import *
+from sharktank.ops import replicate, unshard
 
 # TODO: Should be using a base class with the protocol supported.
-from ..models.llm import *
-from ..models.llama.sharding import shard_theta
-from ..utils.debugging import trace_tensor
-from ..utils.tokenizer import InferenceTokenizer
-from ..utils import cli
+from sharktank.models.llm import *
+from sharktank.models.llama.sharding import shard_theta
+from sharktank.utils.debugging import trace_tensor
+from sharktank.utils.tokenizer import InferenceTokenizer
+from sharktank.utils import cli
 
 
 class TorchGenerator:
@@ -428,7 +428,7 @@ def main():
     model = PagedLlmModelV1(dataset.root_theta, config)
 
     if args.save_intermediates_path:
-        from ..utils.patching import SaveModuleResultTensorsPatch
+        from sharktank.utils.patching import SaveModuleResultTensorsPatch
 
         intermediates_saver = SaveModuleResultTensorsPatch()
         intermediates_saver.patch_child_modules(model)
 
@@ -14,13 +14,15 @@
         --output-irpa-file=/tmp/ffn.irpa /tmp/ffn.mlir
 """
 
+import os
 import math
 
 import torch
 
-from ...layers import *
-from ... import ops
-from ...types import *
+from sharktank.utils import cli
+from sharktank.layers import *
+from sharktank import ops
+from sharktank.types import *
 
 from iree.turbine.aot import DeviceAffinity, DeviceTensorTrait, export
 
@@ -69,9 +71,6 @@ def forward(self, x: torch.Tensor):
 
 
 def main(raw_args=None):
-    from ...utils import cli
-    import os
-
     parser = cli.create_parser()
     parser.add_argument(
         "output_file",
 
@@ -13,13 +13,14 @@
     python -m sharktank.examples.sharding.export_ffn_net \
         --output-irpa-file=/tmp/ffn.irpa /tmp/ffn.mlir
 """
+import os
 
 import torch
-import torch.nn as nn
 
-from ...layers import *
-from ... import ops
-from ...types import *
+from sharktank.layers import *
+from sharktank import ops
+from sharktank.types import *
+from sharktank.utils import cli
 
 
 def create_theta(
@@ -63,8 +64,6 @@ def forward(self, x: torch.Tensor):
 
 
 def main(raw_args=None):
-    from ...utils import cli
-    import os
 
     parser = cli.create_parser()
     parser.add_argument(
 
@@ -10,13 +10,13 @@
 weights of an LLM by converting the RHS of all eligible layers to a sharded
 form.
 """
-from ...models.llama.sharding import shard_theta
-from ...layers import LlamaHParams, LlamaModelConfig
-from ...types import *
+from sharktank.models.llama.sharding import shard_theta
+from sharktank.layers import LlamaHParams, LlamaModelConfig
+from sharktank.types import *
+from sharktank.utils import cli
 
 
 def main(raw_args=None):
-    from ...utils import cli
 
     parser = cli.create_parser()
     cli.add_input_dataset_options(parser)
 
@@ -11,10 +11,10 @@
 from sharktank.layers import *
 from sharktank.types import *
 from sharktank.models.llm import *
+from sharktank.utils import cli
 
 
 def main(args: list[str]):
-    from ..utils import cli
 
     torch.no_grad().__enter__()
 
 
@@ -11,7 +11,7 @@
 from sharktank.types import SplitPrimitiveTensor
 from sharktank.ops import reshard_split, replicate
 from sharktank.layers.paged_attention import PagedAttention
-from ..utils import cli
+from sharktank.utils import cli
 
 
 def main():
 
@@ -11,7 +11,7 @@
 
 from sharktank.models.llama.testing import make_moe_block_theta, make_rand_torch
 from sharktank.layers.mixture_of_experts_block import MoeBlock
-from ..utils import cli
+from sharktank.utils import cli
 
 
 def main():
 
@@ -34,7 +34,7 @@
 
 from iree.turbine.transforms.merger import Merger
 
-from ..utils.logging import get_logger
+from sharktank.utils.logging import get_logger
 
 LIBRARY = def_library("sharktank")
 TEMPLATES_DIR = Path(__file__).parent / "templates"
 
@@ -14,8 +14,9 @@
 from .token_embedding import TokenEmbeddingLayer
 from .paged_llama_attention_block import PagedLlamaAttentionBlock
 from .ffn_block import FFN
-from .ffn_moe_block import FFNMOE
+from .ffn_moe_block import FFNMOE, PreGatherFFNMOE
 from .mixture_of_experts_block import MoeBlock
 from .mmdit import MMDITDoubleBlock, MMDITSingleBlock
+from .modulation import ModulationLayer
 
 from .configs import *