Comment

jackzhxng · jackzhxng · commit 06fca85c9486 · 2025-02-24T14:11:33.000-08:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -606,7 +606,9 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
         )
     )
 
-    # We want to do compute the actual ops in the precision of the dtype_override.
+    # We want to do compute the actual ops in the precision of the dtype_override,
+    # since the precision of the quantized linear will initially be the dtype of the
+    # checkpoint, not the dtype_override.
     def _set_precision_to_fp32(module):
         """
         Recursively iterate through the module and set the precision attribute

Original file line number	Diff line number	Diff line change
`@@ -606,7 +606,9 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:`
`606`	`606`	`)`
`607`	`607`	`)`
`608`	`608`
`609`		`- # We want to do compute the actual ops in the precision of the dtype_override.`
	`609`	`+ # We want to do compute the actual ops in the precision of the dtype_override,`
	`610`	`+ # since the precision of the quantized linear will initially be the dtype of the`
	`611`	`+ # checkpoint, not the dtype_override.`
`610`	`612`	`def _set_precision_to_fp32(module):`
`611`	`613`	`"""`
`612`	`614`	`Recursively iterate through the module and set the precision attribute`