@@ -45,17 +45,45 @@ def read_version(file_path="version.txt"):
4545if version_suffix is None :
4646 version_suffix = f"+git{ get_git_commit_id ()} "
4747
48- use_cpp = os .getenv ("USE_CPP" )
49-
5048import platform
5149
52- build_macos_arm_auto = (
53- use_cpp == "1"
54- and platform .machine ().startswith ("arm64" )
55- and platform .system () == "Darwin"
56- )
50+ ################################################################################
51+ # Build Configuration - Environment Variables and Build Options
52+ ################################################################################
53+
54+ # Core build toggles
55+ use_cpp = os .getenv ("USE_CPP" , "1" )
56+ use_cpu_kernels = os .getenv ("USE_CPU_KERNELS" , "0" ) == "1"
57+
58+ # Platform detection
59+ is_arm64 = platform .machine ().startswith ("arm64" ) or platform .machine () == "aarch64"
60+ is_macos = platform .system () == "Darwin"
61+ is_linux = platform .system () == "Linux"
62+
63+ # Auto-enable experimental builds on ARM64 macOS when USE_CPP=1
64+ build_macos_arm_auto = use_cpp == "1" and is_arm64 and is_macos
65+
66+ # Build configuration hierarchy and relationships:
67+ #
68+ # Level 1: USE_CPP (Primary gate)
69+ # ├── "0" → Skip all C++ extensions (Python-only mode)
70+ # └── "1"/None → Build C++ extensions
71+ #
72+ # Level 2: Platform-specific optimizations
73+ # ├── USE_CPU_KERNELS="1" + Linux → Include optimized CPU kernels (AVX512, etc.)
74+ # └── ARM64 + macOS → Auto-enable experimental builds (build_macos_arm_auto)
75+ #
76+ # Level 3: Experimental builds (cmake-based)
77+ # ├── BUILD_TORCHAO_EXPERIMENTAL="1" → Force experimental builds
78+ # ├── build_macos_arm_auto → Auto-enable on ARM64 macOS
79+ # └── When enabled, provides access to:
80+ # ├── TORCHAO_BUILD_CPU_AARCH64 → ARM64 CPU kernels
81+ # ├── TORCHAO_BUILD_KLEIDIAI → Kleidi AI library integration
82+ # ├── TORCHAO_BUILD_EXPERIMENTAL_MPS → MPS acceleration (macOS only)
83+ # ├── TORCHAO_ENABLE_ARM_NEON_DOT → ARM NEON dot product instructions
84+ # ├── TORCHAO_ENABLE_ARM_I8MM → ARM 8-bit integer matrix multiply
85+ # └── TORCHAO_PARALLEL_BACKEND → Backend selection (aten_openmp, executorch, etc.)
5786
58- use_cpp_kernels = os .getenv ("USE_CPP_KERNELS" , "0" ) == "1"
5987
6088from torchao .utils import TORCH_VERSION_AT_LEAST_2_7
6189
@@ -92,12 +120,10 @@ def __init__(self):
92120 # can be built by explicitly setting TORCHAO_BUILD_CPU_AARCH64=1
93121 self .build_cpu_aarch64 = self ._os_bool_var (
94122 "TORCHAO_BUILD_CPU_AARCH64" ,
95- default = (self . _is_arm64 () and self . _is_macos () ),
123+ default = (is_arm64 and is_macos ),
96124 )
97125 if self .build_cpu_aarch64 :
98- assert self ._is_arm64 (), (
99- "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
100- )
126+ assert is_arm64 , "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
101127
102128 # TORCHAO_BUILD_KLEIDIAI is disabled by default for now because
103129 # 1) It increases the build time
@@ -115,8 +141,8 @@ def __init__(self):
115141 "TORCHAO_BUILD_EXPERIMENTAL_MPS" , default = False
116142 )
117143 if self .build_experimental_mps :
118- assert self . _is_macos () , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MacOS "
119- assert self . _is_arm64 () , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires arm64"
144+ assert is_macos , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires macOS "
145+ assert is_arm64 , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires arm64"
120146 assert torch .mps .is_available (), (
121147 "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MPS be available"
122148 )
@@ -129,7 +155,7 @@ def __init__(self):
129155 # Enabled by default on macOS silicon
130156 self .enable_arm_neon_dot = self ._os_bool_var (
131157 "TORCHAO_ENABLE_ARM_NEON_DOT" ,
132- default = (self . _is_arm64 () and self . _is_macos () ),
158+ default = (is_arm64 and is_macos ),
133159 )
134160 if self .enable_arm_neon_dot :
135161 assert self .build_cpu_aarch64 , (
@@ -146,12 +172,6 @@ def __init__(self):
146172 "TORCHAO_ENABLE_ARM_I8MM requires TORCHAO_BUILD_CPU_AARCH64 be set"
147173 )
148174
149- def _is_arm64 (self ) -> bool :
150- return platform .machine ().startswith ("arm64" ) or platform .machine () == "aarch64"
151-
152- def _is_macos (self ) -> bool :
153- return platform .system () == "Darwin"
154-
155175 def _os_bool_var (self , var , default ) -> bool :
156176 default_val = "1" if default else "0"
157177 return os .getenv (var , default_val ) == "1"
@@ -323,6 +343,11 @@ def __init__(
323343
324344
325345def get_extensions ():
346+ # Skip building C++ extensions if USE_CPP is set to "0"
347+ if use_cpp == "0" :
348+ print ("USE_CPP=0: Skipping compilation of C++ extensions" )
349+ return []
350+
326351 debug_mode = use_debug_mode ()
327352 if debug_mode :
328353 print ("Compiling in debug mode" )
@@ -363,11 +388,7 @@ def get_extensions():
363388 ["-O3" if not debug_mode else "-O0" , "-fdiagnostics-color=always" ]
364389 )
365390
366- if (
367- use_cpp_kernels
368- and platform .system () == "Linux"
369- and TORCH_VERSION_AT_LEAST_2_7
370- ):
391+ if use_cpu_kernels and is_linux and TORCH_VERSION_AT_LEAST_2_7 :
371392 if torch ._C ._cpu ._is_avx512_supported ():
372393 extra_compile_args ["cxx" ].extend (
373394 [
@@ -427,7 +448,7 @@ def get_extensions():
427448
428449 # Collect C++ source files
429450 sources = list (glob .glob (os .path .join (extensions_dir , "**/*.cpp" ), recursive = True ))
430- if not use_cpp_kernels or platform . system () != "Linux" :
451+ if not use_cpu_kernels or not is_linux :
431452 # Remove csrc/cpu/*.cpp
432453 excluded_sources = list (
433454 glob .glob (os .path .join (extensions_dir , "cpu/*.cpp" ), recursive = True )
@@ -652,7 +673,9 @@ def bool_to_on_off(value):
652673 return ext_modules
653674
654675
655- check_submodules ()
676+ # Only check submodules if we're going to build C++ extensions
677+ if use_cpp != "0" :
678+ check_submodules ()
656679
657680setup (
658681 name = "torchao" ,
0 commit comments