pytorch
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/advanced-topics-section.md‎
Lines changed: 112 additions & 0 deletions b/‎docs/source/advanced-topics-section.md‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎docs/source/android-arm-vgf.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/android-arm-vgf.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/android-backends.md‎
Lines changed: 28 additions & 0 deletions b/‎docs/source/android-backends.md‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎docs/source/android-examples.md‎
Lines changed: 9 additions & 0 deletions b/‎docs/source/android-examples.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎docs/source/android-mediatek.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/android-mediatek.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/android-qualcomm.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/android-qualcomm.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/android-samsung-exynos.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/android-samsung-exynos.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/android-section.md‎
Lines changed: 23 additions & 0 deletions b/‎docs/source/android-section.md‎
Lines changed: 23 additions & 0 deletions
@@ -109,7 +109,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        preset: [pybind, windows, llm]
+        preset: [pybind, windows]
     with:
       job-name: build
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 
@@ -329,7 +329,7 @@
     "Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False) -> (Tensor out)"
 )
 lib.define(
-    "avg_pool2d(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, "
+    "avg_pool2d(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=[], bool ceil_mode=False, "
     "bool count_include_pad=True, int? divisor_override=None, Tensor? in_zero_point=None, bool channel_last=False) -> (Tensor out)"
 )
 lib.define(
@@ -525,7 +525,7 @@
     "Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
 )
 lib.define(
-    "avg_pool2d.out(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, "
+    "avg_pool2d.out(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=[], "
     "bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, "
     "Tensor? in_zero_point=None, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
 )
 
@@ -0,0 +1,112 @@
+(advanced-topics-section)=
+
+# Advanced
+
+Deep dive into ExecuTorch's advanced features for optimization, customization, and integration.
+
+This section covers advanced concepts for developers who need to customize ExecuTorch for specific use cases, optimize performance, or integrate with custom hardware backends.
+
+## Quantization & Optimization
+
+Techniques for model compression and performance optimization.
+
+**→ {doc}`quantization-optimization` — Quantization strategies and performance optimization**
+
+Key topics:
+
+- Quantization strategies and techniques
+- Performance profiling and optimization
+
+## Model Export
+
+Learn the core ExecuTorch workflow, exporting PyTorch models to the `.pte` format for edge deployment.
+
+**→ {doc}`using-executorch-export`** - Model Export & Lowering
+
+Key topics:
+
+- Export and Lowering Workflow
+- Hardware Backend Selection & Optimization
+- Dynamic Shapes & Advanced Model Features
+
+
+## Kernel Library
+
+Deep dive into ExecuTorch's kernel implementation and customization.
+
+**→ {doc}`kernel-library-advanced` — Kernel library deep dive and customization**
+
+Key topics:
+
+- Kernel library architecture
+- Custom kernel implementation
+- Selective build and optimization
+
+## Backend & Delegates
+
+**→ {doc}`backend-delegate-advanced` — Backend delegate integration**
+
+Key topics:
+
+- Learn how to integrate Backend Delegate into ExecuTorch and more
+- XNNPACK Delegate Internals
+- Debugging Delegation
+
+
+## Runtime & Integration
+
+Advanced runtime features and backend integration.
+
+**→ {doc}`runtime-integration-advanced` — Runtime customization and backend integration**
+
+Key topics:
+
+- Backend delegate implementation
+- Platform abstraction layer
+- Custom runtime integration
+
+## Compiler & IR
+
+Advanced compiler features and intermediate representation details.
+
+**→ {doc}`compiler-ir-advanced` — Compiler passes and IR specification**
+
+Key topics:
+
+- Custom compiler passes
+- Memory planning strategies
+- Backend dialect and EXIR
+- Ops set definition
+
+
+## File Formats
+
+ExecuTorch file format specifications and internals.
+
+**→ {doc}`file-formats-advanced` — PTE and PTD file format specifications**
+
+Key topics:
+
+- PTE file format internals
+- PTD file format specification
+- Custom file format handling
+
+## Next Steps
+
+After exploring advanced topics:
+
+- **{doc}`tools-sdk-section`** - Developer tools for debugging and profiling
+- **{doc}`api-section`** - Complete API reference documentation
+
+```{toctree}
+:hidden:
+:maxdepth: 2
+:caption: Advanced Topics
+
+quantization-optimization
+using-executorch-export
+kernel-library-advanced
+backend-delegate-advanced
+runtime-integration-advanced
+compiler-ir-advanced
+file-formats-advanced
@@ -0,0 +1 @@
+```{include} backends-arm-vgf.md
@@ -0,0 +1,28 @@
+(android-backends)=
+# Backends
+
+Available hardware acceleration backends for Android deployment.
+
+## CPU Acceleration
+
+- {doc}`android-xnnpack` — XNNPACK CPU acceleration
+
+## GPU Acceleration
+
+- {doc}`android-vulkan` — Vulkan GPU acceleration
+
+## NPU/Accelerator Backends
+
+- {doc}`android-qualcomm` — Qualcomm AI Engine (NPU)
+- {doc}`android-mediatek` — MediaTek NPU acceleration
+- {doc}`android-arm-vgf` — ARM VGF Backend
+- {doc}`android-samsung-exynos` — Samsung Exynos NPU
+
+```{toctree}
+:hidden:
+android-xnnpack
+android-vulkan
+android-qualcomm
+android-mediatek
+android-arm-vgf
+android-samsung-exynos
@@ -0,0 +1,9 @@
+# Examples & Demos
+
+- [Working with LLMs - Android Examples](https://github.com/meta-pytorch/executorch-examples/tree/main/llm/android)
+- [Demo Apps](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3/android/DeepLabV3Demo#executorch-android-demo-app)
+- {doc}`tutorial-arm-vgf` — Export a simple PyTorch model for the ExecuTorch VGF backend
+
+```{toctree}
+:hidden:
+tutorial-arm-vgf
@@ -0,0 +1 @@
+```{include} backends-mediatek.md
@@ -0,0 +1 @@
+```{include} backends-qualcomm.md
@@ -0,0 +1 @@
+```{include} backends-samsung-exynos.md
@@ -0,0 +1,23 @@
+(android-section)=
+
+# Android
+
+Deploy ExecuTorch on Android devices with hardware acceleration support.
+
+## Quick Start & Integration
+
+- {doc}`using-executorch-android` — Complete Android integration guide
+
+## Backends
+
+- {doc}`android-backends` — Available Android backends and acceleration options
+
+## Examples & Demos
+
+- {doc}`android-examples` — Explore Android Examples & Demos
+
+```{toctree}
+:hidden:
+using-executorch-android
+android-backends
+android-examples
Original file line number	Diff line number	Diff line change
`@@ -329,7 +329,7 @@`
`329`	`329`	`"Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False) -> (Tensor out)"`
`330`	`330`	`)`
`331`	`331`	`lib.define(`
`332`		`- "avg_pool2d(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, "`
	`332`	`+ "avg_pool2d(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=[], bool ceil_mode=False, "`
`333`	`333`	`"bool count_include_pad=True, int? divisor_override=None, Tensor? in_zero_point=None, bool channel_last=False) -> (Tensor out)"`
`334`	`334`	`)`
`335`	`335`	`lib.define(`
`@@ -525,7 +525,7 @@`
`525`	`525`	`"Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"`
`526`	`526`	`)`
`527`	`527`	`lib.define(`
`528`		`- "avg_pool2d.out(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, "`
	`528`	`+ "avg_pool2d.out(Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=[], "`
`529`	`529`	`"bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, "`
`530`	`530`	`"Tensor? in_zero_point=None, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"`
`531`	`531`	`)`