up

metascroy · metascroy · commit af6f818cfe44 · 2024-11-06T10:06:36.000-08:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -121,25 +121,6 @@ def build_model(
     return export_llama(modelname, args)
 
 
-def _is_valid_torchao_qmode_type(value):
-    if not isinstance(value, str):
-        return False
-
-    if not value.startswith("torchao:"):
-        return False
-
-    patterns = [
-        r"emb.(\d+),(\d+)&lin8da.(\d+),(\d+)",
-        r"emb.(\d+),(\d+)",
-        r"lin8da.(\d+),(\d+)",
-    ]
-    for pattern in patterns:
-        matches = re.findall(pattern, value)
-        if len(matches) == 1:
-            return True
-    return False
-
-
 def build_args_parser() -> argparse.ArgumentParser:
     ckpt_dir = f"{Path(__file__).absolute().parent.as_posix()}"
     parser = argparse.ArgumentParser()
@@ -173,20 +154,6 @@ def build_args_parser() -> argparse.ArgumentParser:
         help="Use PT2E quantization. Comma separated options. e.g. xnnpack_dynamic (for per channel 8 bit weight), xnnpack_dynamic_qc4 (for per channel 4 bit weight), embedding.",
     )
 
-    def _qmode_type(value):
-        choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
-        if not (value in choices or _is_valid_torchao_qmode_type(value)):
-            raise argparse.ArgumentTypeError(
-                f"Got qmode {value}, but expected one of: {choices} or a valid torchao quantization pattern such as:"
-                + "\n\t* torchao:emb.{embed_bitwidth},{embed_groupsize}"
-                + "\n\t\t (e.g., torchao:emb.4,32)"
-                + "\n\t* torchao:emb.{embed_bitwidth},{embed_groupsize}&lin8da.{linear_bitwidth},{linear_groupsize}"
-                + "\n\t\t (e.g., torchao:emb.4,32&lin8da.4,128)"
-                + "\n\t* torchao:lin8da.{linear_bitwidth},{linear_groupsize}"
-                + "\nt\t\t (e.g., torchao:lin8da.4,128)"
-            )
-        return value
-
     parser.add_argument(
         "-qmode",
         "--quantization_mode",
@@ -601,6 +568,40 @@ def get_quantizer_and_quant_params(args):
     return pt2e_quant_params, quantizers, quant_dtype
 
 
+def _is_valid_torchao_qmode_type(value):
+    if not isinstance(value, str):
+        return False
+
+    if not value.startswith("torchao:"):
+        return False
+
+    patterns = [
+        r"emb.(\d+),(\d+)&lin8da.(\d+),(\d+)",
+        r"emb.(\d+),(\d+)",
+        r"lin8da.(\d+),(\d+)",
+    ]
+    for pattern in patterns:
+        matches = re.findall(pattern, value)
+        if len(matches) == 1:
+            return True
+    return False
+
+
+def _qmode_type(value):
+    choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
+    if not (value in choices or _is_valid_torchao_qmode_type(value)):
+        raise argparse.ArgumentTypeError(
+            f"Got qmode {value}, but expected one of: {choices} or a valid torchao quantization pattern such as:"
+            + "\n\t* torchao:emb.{embed_bitwidth},{embed_groupsize}"
+            + "\n\t\t (e.g., torchao:emb.4,32)"
+            + "\n\t* torchao:emb.{embed_bitwidth},{embed_groupsize}&lin8da.{linear_bitwidth},{linear_groupsize}"
+            + "\n\t\t (e.g., torchao:emb.4,32&lin8da.4,128)"
+            + "\n\t* torchao:lin8da.{linear_bitwidth},{linear_groupsize}"
+            + "\nt\t\t (e.g., torchao:lin8da.4,128)"
+        )
+    return value
+
+
 def _validate_args(args):
     """
     TODO: Combine all the backends under --backend args
@@ -618,6 +619,7 @@ def _validate_args(args):
         if args.enable_dynamic_shape:
             raise ValueError(
                 "Dynamic shape is not currently supported with torchao qmode. Please use --disable_dynamic_shape."
+                "If you need this feature, please file an issue."
             )
 
 
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -79,12 +79,14 @@ def quantize(  # noqa C901
         libs = glob.glob(
             os.path.abspath(
                 os.path.join(
-                    os.path.dirname(__file__),
-                    "../../../../cmake-out/third-party/ao/torchao/experimental/libtorchao_ops_aten.*",
+                    os.environ.get("CMAKE_INSTALL_PREFIX", ""),
+                    "lib/libtorchao_ops_aten.*",
                 )
             )
         )
-        assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
+        assert (
+            len(libs) == 1
+        ), f"Expected 1 library but got {len(libs)}.  If you installed the torchao ops in a non-standard location, please set CMAKE_INSTALL_PREFIX correctly."
         logging.info(f"Loading custom ops library: {libs[0]}")
         torch.ops.load_library(libs[0])
 

Original file line number	Diff line number	Diff line change
`@@ -79,12 +79,14 @@ def quantize( # noqa C901`
`79`	`79`	`libs = glob.glob(`
`80`	`80`	`os.path.abspath(`
`81`	`81`	`os.path.join(`
`82`		`- os.path.dirname(__file__),`
`83`		`- "../../../../cmake-out/third-party/ao/torchao/experimental/libtorchao_ops_aten.*",`
	`82`	`+ os.environ.get("CMAKE_INSTALL_PREFIX", ""),`
	`83`	`+ "lib/libtorchao_ops_aten.*",`
`84`	`84`	`)`
`85`	`85`	`)`
`86`	`86`	`)`
`87`		`- assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"`
	`87`	`+ assert (`
	`88`	`+ len(libs) == 1`
	`89`	`+ ), f"Expected 1 library but got {len(libs)}. If you installed the torchao ops in a non-standard location, please set CMAKE_INSTALL_PREFIX correctly."`
`88`	`90`	`logging.info(f"Loading custom ops library: {libs[0]}")`
`89`	`91`	`torch.ops.load_library(libs[0])`
`90`	`92`