Update on "[executorch][runtime] Introduce PteDataMap for weight sharing"

lucylq · lucylq · commit 43484a23e7e3 · 2025-03-04T14:24:05.000-08:00
PteDataMap is the NamedDataMap that will live in the runtime. It is used to give delegates access to opaque named data stored in the PTE file. Open to alternative naming suggestions, maybe 'PTEDataMap' or 'ProgramDataMap'? **Usage** The PteDataMap is owned by the program, and instantiated at program load time if named_data exists in the PTE file. We introduce usage of 'std::optional' here. I think we can also use executorch::aten::optional to avoid adding standard lib ? When initializing delegates, the PteDataMap is given to delegate_init. Delegates can retrieve opaque delegate data by key using 'get_data'. This gives them a FreeableBuffer that they can free later. **Testing** This test uses the C++ flatbuffer API to build a fake program containing named data. We also creates a temp file with sample data that the data loader can wrap around. TODO: e2e test once delegate aot is ready and we can generate a file with named data. **Note** As the PteDataMap wraps around flatbuffer constructs, the Program must outlive the PteDataMap. PteDataMap does not implement - get_metadata; currently, all data stored is opaque. Later, we can implement get_metadata if a backend stores plain tensor data. - load_into; this is mostly used for the training case, and isn't used by delegates, at least not at the moment Differential Revision: [D70213646](https://our.internmc.facebook.com/intern/diff/D70213646/) [ghstack-poisoned]
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -96,63 +96,6 @@ jobs:
 
           PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS
 
-  prepare-test-specs:
-    runs-on: linux.2xlarge
-    needs: set-parameters
-    strategy:
-      matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
-      fail-fast: false
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Prepare the spec
-        id: prepare
-        shell: bash
-        env:
-          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
-        working-directory: extension/benchmark/android/benchmark
-        run: |
-          set -eux
-
-          # The model will be exported in the next step to this S3 path
-          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
-          # We could write a script to properly use jinja here, but there is only one variable,
-          # so let's just sed it
-          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
-
-          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
-          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
-          # later by the upload script
-          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
-
-          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
-          # Just print the test spec for debugging
-          cat android-llm-device-farm-test-spec.yml
-
-          # Save the benchmark configs so that we can use it later in the dashboard
-          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
-          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
-
-      - name: Upload the spec
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
-          retention-days: 1
-          if-no-files-found: error
-          path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
-
-      - name: Update the benchmark configs
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
-          retention-days: 1
-          if-no-files-found: error
-          path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
-
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -335,6 +278,69 @@ jobs:
         fi
         echo "::endgroup::"
 
+  prepare-test-specs:
+    runs-on: linux.2xlarge
+    needs:
+      - set-parameters
+      - export-models
+    strategy:
+      matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Prepare the spec
+        id: prepare
+        shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
+        working-directory: extension/benchmark/android/benchmark
+        run: |
+          set -eux
+
+          # The model will be exported in the next step to this S3 path
+          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
+
+          # Check if the model artifact exists, fail this step skip generating test-spec.
+          curl -s --head -f ${MODEL_PATH}
+
+          # We could write a script to properly use jinja here, but there is only one variable,
+          # so let's just sed it
+          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
+
+          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
+
+          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
+          # Just print the test spec for debugging
+          cat android-llm-device-farm-test-spec.yml
+
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
+
+      - name: Upload the spec
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
+
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
+
   build-benchmark-app:
     name: build-benchmark-app
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -98,63 +98,6 @@ jobs:
 
           echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}"
 
-  prepare-test-specs:
-    runs-on: linux.2xlarge
-    needs: set-parameters
-    strategy:
-      matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
-      fail-fast: false
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Prepare the spec
-        id: prepare
-        shell: bash
-        env:
-          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
-        working-directory: extension/benchmark/apple/Benchmark
-        run: |
-          set -eux
-
-          # The model will be exported in the next step to this S3 path
-          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
-          # We could write a script to properly use jinja here, but there is only one variable,
-          # so let's just sed it
-          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
-
-          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
-          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
-          # later by the upload script
-          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
-
-          cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
-          # Just print the test spec for debugging
-          cat default-ios-device-farm-appium-test-spec.yml
-
-          # Save the benchmark configs so that we can use it later in the dashboard
-          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
-          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
-
-      - name: Upload the spec
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
-          retention-days: 1
-          if-no-files-found: error
-          path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
-
-      - name: Update the benchmark configs
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
-          retention-days: 1
-          if-no-files-found: error
-          path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
-
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -344,6 +287,68 @@ jobs:
         fi
         echo "::endgroup::"
 
+  prepare-test-specs:
+    runs-on: linux.2xlarge
+    needs:
+      - set-parameters
+      - export-models
+    strategy:
+      matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Prepare the spec
+        id: prepare
+        shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
+        working-directory: extension/benchmark/apple/Benchmark
+        run: |
+          set -eux
+
+          # The model will be exported in the next step to this S3 path
+          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
+          # Check if the model artifact exists, fail this step skip generating test-spec.
+          curl -s --head -f ${MODEL_PATH}
+          # We could write a script to properly use jinja here, but there is only one variable,
+          # so let's just sed it
+          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
+          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
+          cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
+          # Just print the test spec for debugging
+          cat default-ios-device-farm-appium-test-spec.yml
+
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
+
+      - name: Upload the spec
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
+
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
+
+
   build-benchmark-app:
     name: build-benchmark-app
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp
@@ -503,7 +503,7 @@ void ETDumpGen::set_debug_buffer(Span<uint8_t> buffer) {
   Result<BufferDataSink> bds_ret = BufferDataSink::create(buffer);
   ET_CHECK_MSG(
       bds_ret.ok(),
-      "Failed to write tensor with error 0x%" PRIx32,
+      "Failed to create data sink from debug buffer with error 0x%" PRIx32,
       static_cast<uint32_t>(bds_ret.error()));
 
   buffer_data_sink_ = std::move(bds_ret.get());
diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py
@@ -461,7 +461,7 @@ def get_metadata(self):
             "get_bos_id": 1,
             "get_eos_id": 2,
             "get_dim": self.dim,
-            "get_head_dim": self.dim // self.n_heads,
+            "get_head_dim": self.head_dim,
             "get_max_batch_size": self.max_batch_size,
             "get_max_seq_len": self.max_seq_len,
             "get_n_bos": 1,
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -978,6 +978,18 @@ def _remove_invalid_ops_for_not_decompose(
 ) -> List[torch._ops.OpOverload]:
     # To address https://github.com/pytorch/executorch/issues/8781
     def keep(op):
+        # Explicit allow list
+        allow_list = []
+        try:
+            # Ops in torch.ops.quant are not always loaded, so we use try/except
+            # Aliases output, but we need to allow it for XNNPACK
+            allow_list.append(torch.ops.quant.choose_qparams_affine.default)
+        except:
+            pass
+
+        if op in allow_list:
+            return True
+
         schema = op._schema
         native_schema = _pybind_schema_to_native_schema(schema)
         if native_schema.is_mutable:
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -448,6 +448,8 @@ def to_edge_transform_and_lower(
             compile_config=edge_config,
             constant_methods=self.metadata,
         )
+        if self.verbose:
+            logging.info(f"Exported graph:\n{self.edge_manager.exported_program()}")
         return self
 
     def to_executorch(
diff --git a/runtime/executor/program.h b/runtime/executor/program.h
@@ -269,14 +269,14 @@ class Program final {
       FreeableBuffer&& program_data,
       const executorch_flatbuffer::Program* internal_program,
       FreeableBuffer&& constant_segment_data,
-      std::optional<internal::PteDataMap>&& core_data_map)
+      std::optional<internal::PteDataMap>&& pte_data_map)
       : program_data_(std::move(program_data)),
         // Don't need the loader if there are no segments.
         loader_(segment_base_offset > 0 ? loader : nullptr),
         internal_program_(internal_program),
         segment_base_offset_(segment_base_offset),
         constant_segment_data_(std::move(constant_segment_data)),
-        core_data_map_(std::move(core_data_map)) {}
+        pte_data_map_(std::move(pte_data_map)) {}
 
   // Not copyable or assignable.
   Program(const Program& rhs) = delete;
@@ -301,7 +301,7 @@ class Program final {
   FreeableBuffer constant_segment_data_;
 
   /// NamedDataMap holding named data from the program.
-  std::optional<internal::PteDataMap> core_data_map_;
+  std::optional<internal::PteDataMap> pte_data_map_;
 };
 
 } // namespace runtime
diff --git a/runtime/executor/pte_data_map.cpp b/runtime/executor/pte_data_map.cpp
@@ -16,12 +16,8 @@ namespace internal {
 /* static */ executorch::runtime::Result<PteDataMap> PteDataMap::create(
     executorch::runtime::DataLoader* loader,
     size_t segment_base_offset,
-    const flatbuffers::Vector<
-        flatbuffers::Offset<executorch_flatbuffer::NamedData>,
-        uint32_t>* named_data,
-    const flatbuffers::Vector<
-        flatbuffers::Offset<executorch_flatbuffer::DataSegment>,
-        uint32_t>* segments) {
+    const flatbuffers::FlatbufferNamedData* named_data,
+    const flatbuffers::FlatbufferDataSegment* segments) {
   ET_CHECK_OR_RETURN_ERROR(
       loader != nullptr && named_data != nullptr && segments != nullptr,
       InvalidArgument,
diff --git a/runtime/executor/pte_data_map.h b/runtime/executor/pte_data_map.h
diff --git a/runtime/executor/targets.bzl b/runtime/executor/targets.bzl

Original file line number	Diff line number	Diff line change
`@@ -448,6 +448,8 @@ def to_edge_transform_and_lower(`
`448`	`448`	`compile_config=edge_config,`
`449`	`449`	`constant_methods=self.metadata,`
`450`	`450`	`)`
	`451`	`+ if self.verbose:`
	`452`	`+ logging.info(f"Exported graph:\n{self.edge_manager.exported_program()}")`
`451`	`453`	`return self`
`452`	`454`
`453`	`455`	`def to_executorch(`