pytorch
diff --git a/‎examples/models/llama/TARGETS‎
Lines changed: 27 additions & 0 deletions b/‎examples/models/llama/TARGETS‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/models/llama/TestInt8DynActInt4WeightLinear.py‎ renamed to ‎examples/models/llama/test_8da4w.py‎
Lines changed: 20 additions & 4 deletions b/‎examples/models/llama/TestInt8DynActInt4WeightLinear.py‎ renamed to ‎examples/models/llama/test_8da4w.py‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎examples/models/llama/wq.pth‎
4.5 MB b/‎examples/models/llama/wq.pth‎
4.5 MB
diff --git a/‎examples/models/llama/x.pt‎
9.86 MB b/‎examples/models/llama/x.pt‎
9.86 MB
@@ -216,3 +216,30 @@ runtime.python_test(
         "//executorch/examples/models/llama:llama_transformer",
     ],
 )
+
+runtime.python_library(
+    name = "test_8da4w_library",
+    srcs = [
+        "test_8da4w.py"
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.llama",
+    visibility = [
+        "//bento/...",
+        "//bento_kernels/...",
+        "//executorch/examples/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//pytorch/ao:torchao",
+    ]
+)
+
+runtime.python_binary(
+    name = "test_8da4w",
+    main_function = "executorch.examples.models.llama.test_8da4w.main",
+    deps = [
+        ":test_8da4w_library",
+        "//caffe2:torch",
+    ]
+)
@@ -1,3 +1,5 @@
+import os
+
 import torch.cuda
 
 from torch import nn
@@ -23,19 +25,33 @@ def forward(self, x: torch.tensor):
 
 
 def main() -> None:
+    seed = 42
+    torch.manual_seed(seed)
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    input = torch.load("/home/lunwenh/models/x.pt").to(device=device)
+    input = torch.load(f"{os.path.dirname(__file__)}/x.pt").to(device=device)
     checkpoint = torch.load(
-        "/home/lunwenh/models/wq.pth",
+        f"{os.path.dirname(__file__)}/wq.pth",
         map_location=device,
         mmap=True,
     )
     print(f"input {input}")
-    for i in range(5):
+    results = []
+    iterations = 10
+    for i in range(iterations):
         model = Attention(device).to(device=device)
         model.load_state_dict(checkpoint, strict=False, assign=True)
 
-        print(model.forward(input))
+        result = model.forward(input)
+        exist = False
+        for existing_result in results:
+            if torch.allclose(result, existing_result):
+                exist = True
+                break
+        if not exist:
+            results.append(result)
+    print(f"Generated {len(results)} results with {iterations} iterations")
+    for i, result in enumerate(results):
+        print(f"result {i} {result}")
 
 
 if __name__ == "__main__":