Qualcomm AI Engine Direct - Update arch and soc chipset terms (#6227)

winskuo-quic · facebook-github-bot · commit 55ed63f600ab · 2024-10-15T11:24:11.000-07:00
Summary: - Previously, we use the term arch_table and soc model interchangeably. However, these 2 terms referd to 2 different things. arch_table is now referred to htp arch table, which refers to htp architecture verison, such as v73, v75, etc. On the other hand, SoC model will be referring to the SoC chipsets ID, such as SM8550, SM8650, etc. - Removed tutorial to add SoC. I don't think external users will be able to get SoC IDs. Pull Request resolved: #6227 Reviewed By: tarun292 Differential Revision: D64411771 Pulled By: cccclai fbshipit-source-id: 1871b1df1633560be48eb80f62ac83382dd7164f
diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md
@@ -21,17 +21,8 @@ Please check `generate_qnn_executorch_compiler_spec()` in
 - Snapdragon 8 Gen 2
 - Snapdragon 8 Gen 3
 
-### How to add more supported Chipset
-
-#### Step 1: Check SoC model of snapdragon device
-Get SoC model which would like to be supported from the document of Qualcomm AI Engine Direct SDK.
-
-#### Step 2: Update schema of compiler option and SoC information in serialization
-Add SoC model into QcomChipset enum in [schema](./serialization/schema.fbs) and [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py).
-Insert new SoC information into _soc_info_table in [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py).
-
-#### Step 3: Recompile the .pte file
-Follow [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment and build runtime with new schema header.
+### Adding more supported Chipset
+Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.
 
 ### Supported Inference Type
 - Quantized
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -68,7 +68,7 @@ def setUp(self):
         TestQNN.rtol = 1e-1
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -522,7 +522,7 @@ def setUp(self):
         TestQNN.rtol = 1e-1
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -674,7 +674,7 @@ def setUp(self):
         TestQNN.rtol = 1
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -1236,7 +1236,7 @@ def setUp(self):
         TestQNN.rtol = 1
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -1444,7 +1444,7 @@ def setUp(self):
         TestQNN.rtol = 1e-1
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -1453,7 +1453,7 @@ def setUp(self):
     def test_qnn_backend_dump_intermediate_outputs(self):
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             dump_intermediate_outputs=True,
         )
@@ -1498,7 +1498,7 @@ def test_qnn_backend_multi_contexts(self):
             use_multi_contexts=True,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         partitioner = QnnPartitioner(compiler_specs)
@@ -1514,7 +1514,7 @@ def test_qnn_backend_multi_contexts_composite(self):
             use_multi_contexts=True,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         module = CompositeDelegateModule(  # noqa: F405
@@ -1535,7 +1535,7 @@ def test_qnn_backend_profile_op(self):
         TestQNN.enable_profile = True
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             profile=True,
         )
@@ -1554,7 +1554,7 @@ def test_qnn_backend_shared_buffer(self):
             use_fp16=True,
         )
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             shared_buffer=True,
         )
@@ -1569,7 +1569,7 @@ def test_qnn_backend_shared_buffer(self):
     def test_qnn_backend_online_prepare(self):
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             online_prepare=True,
         )
@@ -1590,7 +1590,7 @@ def test_qnn_backend_context_direct(self):
             bundle_program = from_context_binary(ctx_path, "ctx_loader")
             backend_options = generate_htp_compiler_spec(use_fp16=True)
             compiler_specs = generate_qnn_executorch_compiler_spec(
-                soc_model=self.arch_table[TestQNN.model],
+                soc_model=self.chipset_table[TestQNN.model],
                 backend_options=backend_options,
                 is_from_context_binary=True,
             )
@@ -1614,7 +1614,7 @@ def setUp(self):
         TestQNN.rtol = 1
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             debug=False,
             saver=False,
@@ -1623,7 +1623,7 @@ def setUp(self):
     def test_qnn_backend_dump_intermediate_outputs(self):
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             dump_intermediate_outputs=True,
         )
@@ -1657,7 +1657,7 @@ def test_qnn_backend_skip_node_id_quantizer(self):
             use_fp16=False,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         partitioner = QnnPartitioner(compiler_specs)
@@ -1704,7 +1704,7 @@ def test_qnn_backend_skip_node_op_quantizer(self):
             use_fp16=False,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         partitioner = QnnPartitioner(compiler_specs)
@@ -1740,7 +1740,7 @@ def test_qnn_backend_graph_level_mixed_precision(self):
             use_fp16=False,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         partitioner = QnnPartitioner(compiler_specs)
@@ -1781,7 +1781,7 @@ def test_qnn_backend_multi_contexts(self):
             use_multi_contexts=True,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         partitioner = QnnPartitioner(compiler_specs)
@@ -1797,7 +1797,7 @@ def test_qnn_backend_multi_contexts_composite(self):
             use_multi_contexts=True,
         )
         compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
         )
         module = CompositeDelegateModule(  # noqa: F405
@@ -1819,7 +1819,7 @@ def test_qnn_backend_profile_op(self):
         TestQNN.enable_profile = True
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             profile=True,
         )
@@ -1839,7 +1839,7 @@ def test_qnn_backend_shared_buffer(self):
             use_fp16=False,
         )
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             shared_buffer=True,
         )
@@ -1855,7 +1855,7 @@ def test_qnn_backend_shared_buffer(self):
     def test_qnn_backend_online_prepare(self):
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
-            soc_model=self.arch_table[TestQNN.model],
+            soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
             online_prepare=True,
         )
@@ -1877,7 +1877,7 @@ def test_qnn_backend_context_direct(self):
             bundle_program = from_context_binary(ctx_path, "ctx_loader")
             backend_options = generate_htp_compiler_spec(use_fp16=False)
             compiler_specs = generate_qnn_executorch_compiler_spec(
-                soc_model=self.arch_table[TestQNN.model],
+                soc_model=self.chipset_table[TestQNN.model],
                 backend_options=backend_options,
                 is_from_context_binary=True,
             )
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
@@ -120,7 +120,7 @@ class TestQNN(unittest.TestCase):
     build_folder: Literal = ""
     model: QcomChipset = None
     compiler_specs: List[CompileSpec] = None
-    arch_table = get_soc_to_chipset_map()
+    chipset_table = get_soc_to_chipset_map()
     error_only = False
     ip = "localhost"
     port = 8080
diff --git a/examples/qualcomm/oss_scripts/llama2/llama.py b/examples/qualcomm/oss_scripts/llama2/llama.py
@@ -394,7 +394,7 @@ def compile(args):
     end_quantize_ts = time.time()
     print("single_llama.quantize(quant_dtype)", end_quantize_ts - start_quantize_ts)
     single_llama.lowering_modules(
-        args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map[args.model]
+        args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map()[args.model]
     )
     end_lowering_ts = time.time()
     print("Complete Compile", end_lowering_ts - end_quantize_ts)
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
@@ -83,7 +83,7 @@ def __init__(
         self.dump_intermediate_outputs = dump_intermediate_outputs
         self.debug_output_path = f"{self.workspace}/debug_output.bin"
         self.output_folder = f"{self.workspace}/outputs"
-        self.soc_model = get_soc_to_arch_map()[soc_model]
+        self.htp_arch = get_soc_to_arch_map()[soc_model]
         self.error_only = error_only
         self.shared_buffer = shared_buffer
         self.runner = runner
@@ -108,12 +108,12 @@ def push(self, inputs=None, input_list=None, files=None):
             *self.pte_path,
             f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so",
             (
-                f"{self.qnn_sdk}/lib/hexagon-v{self.soc_model}/"
-                f"unsigned/libQnnHtpV{self.soc_model}Skel.so"
+                f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/"
+                f"unsigned/libQnnHtpV{self.htp_arch}Skel.so"
             ),
             (
                 f"{self.qnn_sdk}/lib/aarch64-android/"
-                f"libQnnHtpV{self.soc_model}Stub.so"
+                f"libQnnHtpV{self.htp_arch}Stub.so"
             ),
             f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so",
             f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so",

Original file line number	Diff line number	Diff line change
`@@ -394,7 +394,7 @@ def compile(args):`
`394`	`394`	`end_quantize_ts = time.time()`
`395`	`395`	`print("single_llama.quantize(quant_dtype)", end_quantize_ts - start_quantize_ts)`
`396`	`396`	`single_llama.lowering_modules(`
`397`		`- args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map[args.model]`
	`397`	`+ args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map()[args.model]`
`398`	`398`	`)`
`399`	`399`	`end_lowering_ts = time.time()`
`400`	`400`	`print("Complete Compile", end_lowering_ts - end_quantize_ts)`