Skip to content

Commit ce21259

Browse files
gbaraldiclaude
andcommitted
processor: replace hand-maintained CPU tables with cpufeatures library
Replace the hand-maintained processor_x86.cpp, processor_arm.cpp, and processor_fallback.cpp with a single processor.cpp that uses the cpufeatures library for CPU detection, feature resolution, sysimage serialization, and target matching. cpufeatures extracts CPU tables from LLVM's TableGen at build time, so they stay in sync with the LLVM version Julia ships. This removes ~3600 lines of manually maintained feature lists and CPU definitions. Key changes: - src/processor.cpp: single file replacing three arch-specific backends - deps/cpufeatures.mk: new dependency (static library, no runtime dep) - base/cpuid.jl: cross-arch ISA queries via cpufeatures - base/loading.jl: updated ImageTarget for new serialization format - src/init.c: CPU target validation (unknown names, multi-target checks) - Removed LLVMTargetParser from libjulia-internal link Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2cf16b1 commit ce21259

19 files changed

+1057
-4503
lines changed

base/Makefile

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,21 @@ else
1717
PCRE_INCL_PATH := $(build_includedir)/pcre2.h
1818
endif
1919

20-
define parse_features
21-
@printf "%s\n" "# $(2) features" >> $@
22-
@$(call PRINT_PERL, cat $(SRCDIR)/../src/features_$(1).h | perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
20+
# Extract feature indices from cpufeatures generated headers.
21+
# The FeatureIndex enum has entries like: FEAT_SSE3 = 108,
22+
# We convert them to: const JL_X86_sse3 = UInt32(108)
23+
CPUFEATURES_GENDIR := $(build_includedir)/cpufeatures
24+
25+
define parse_cpufeatures
26+
@printf "%s\n" "# $(2) features (from cpufeatures)" >> $@
27+
@$(call PRINT_PERL, perl -lne 'if (/^\s*FEAT_(\w+)\s*=\s*(\d+)/) { my $$n = lc($$1); print "const JL_$(2)_$$n = UInt32($$2)" }' $(1) >> $@)
2328
@printf "\n" >> $@
2429
endef
2530

26-
$(BUILDDIR)/features_h.jl: $(SRCDIR)/../src/features_x86.h $(SRCDIR)/../src/features_aarch32.h $(SRCDIR)/../src/features_aarch64.h
31+
$(BUILDDIR)/features_h.jl: $(wildcard $(CPUFEATURES_GENDIR)/target_tables_*.h)
2732
@-rm -f $@
28-
@$(call parse_features,x86,X86)
29-
@$(call parse_features,aarch32,AArch32)
30-
@$(call parse_features,aarch64,AArch64)
33+
@$(call parse_cpufeatures,$(CPUFEATURES_GENDIR)/target_tables_x86_64.h,X86)
34+
@$(call parse_cpufeatures,$(CPUFEATURES_GENDIR)/target_tables_aarch64.h,AArch64)
3135

3236
$(BUILDDIR)/pcre_h.jl: $(PCRE_INCL_PATH)
3337
@$(call PRINT_PERL, $(CPP) -D PCRE2_CODE_UNIT_WIDTH=8 -dM $< | perl -nle '/^\s*#define\s+PCRE2_(\w*)\s*\(?($(PCRE_CONST))\)?u?\s*$$/ and print index($$1, "ERROR_") == 0 ? "const $$1 = Cint($$2)" : "const $$1 = UInt32($$2)"' | LC_ALL=C sort > $@)

base/cpuid.jl

Lines changed: 165 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ export cpu_isa
1010
A structure which represents the Instruction Set Architecture (ISA) of a
1111
computer. It holds the `Set` of features of the CPU.
1212
13-
The numerical values of the features are automatically generated from the C
14-
source code of Julia and stored in the `features_h.jl` Julia file.
13+
Feature bit indices come from the cpufeatures library's generated tables
14+
(extracted from LLVM's TableGen data at build time).
1515
"""
1616
struct ISA
1717
features::Set{UInt32}
@@ -23,55 +23,167 @@ Base.isless(a::ISA, b::ISA) = a < b
2323

2424
include(string(Base.BUILDROOT, "features_h.jl")) # include($BUILDROOT/base/features_h.jl)
2525

26-
# Keep in sync with `arch_march_isa_mapping`.
26+
"""
27+
_featurebytes_to_isa(buf::Vector{UInt8}) -> ISA
28+
29+
Convert a raw feature byte buffer (from cpufeatures) into an ISA.
30+
"""
31+
function _featurebytes_to_isa(buf::Vector{UInt8})
32+
features = Set{UInt32}()
33+
for byte_idx in 0:length(buf)-1
34+
b = buf[byte_idx + 1]
35+
b == 0 && continue
36+
for bit in 0:7
37+
if (b >> bit) & 1 != 0
38+
push!(features, UInt32(byte_idx * 8 + bit))
39+
end
40+
end
41+
end
42+
return ISA(features)
43+
end
44+
45+
"""
46+
_cross_lookup_cpu(arch::String, name::String) -> ISA
47+
48+
Look up hardware features for a CPU on any architecture using the
49+
cross-arch tables. Works regardless of host architecture.
50+
Returns an empty ISA if the CPU or architecture is not found.
51+
"""
52+
function _cross_lookup_cpu(arch::String, name::String)
53+
nbytes = ccall(:jl_cpufeatures_cross_nbytes, Csize_t, (Cstring,), arch)
54+
nbytes == 0 && return ISA(Set{UInt32}())
55+
buf = Vector{UInt8}(undef, nbytes)
56+
written = ccall(:jl_cpufeatures_cross_lookup, Csize_t,
57+
(Cstring, Cstring, Ptr{UInt8}, Csize_t),
58+
arch, name, buf, nbytes)
59+
written == 0 && return ISA(Set{UInt32}())
60+
return _featurebytes_to_isa(buf)
61+
end
62+
63+
"""
64+
_build_bit_to_name(arch::String) -> Dict{UInt32, String}
65+
66+
Build a mapping from feature bit index to feature name for an architecture.
67+
"""
68+
function _build_bit_to_name(arch::String)
69+
nfeats = ccall(:jl_cpufeatures_cross_num_features, UInt32, (Cstring,), arch)
70+
result = Dict{UInt32, String}()
71+
for i in 0:nfeats-1
72+
name_ptr = ccall(:jl_cpufeatures_cross_feature_name, Cstring, (Cstring, UInt32), arch, i)
73+
name_ptr == C_NULL && continue
74+
bit = ccall(:jl_cpufeatures_cross_feature_bit, Cint, (Cstring, UInt32), arch, i)
75+
bit < 0 && continue
76+
result[UInt32(bit)] = unsafe_string(name_ptr)
77+
end
78+
return result
79+
end
80+
81+
"""
82+
feature_names(arch::String, cpu::String) -> Vector{String}
83+
feature_names(arch::String, isa::ISA) -> Vector{String}
84+
feature_names(isa::ISA) -> Vector{String}
85+
feature_names() -> Vector{String}
86+
87+
Return sorted hardware feature names. Can query by CPU name (on any
88+
architecture) or by ISA. Defaults to the host architecture and CPU.
89+
90+
# Examples
91+
```julia
92+
feature_names() # host CPU features
93+
feature_names("x86_64", "haswell") # haswell's features
94+
feature_names("aarch64", "cortex-x925") # cross-arch query
95+
```
96+
"""
97+
feature_names() = feature_names(string(Sys.ARCH), _host_isa())
98+
feature_names(isa::ISA) = feature_names(string(Sys.ARCH), isa)
99+
function feature_names(arch::String, cpu::String)
100+
isa = _cross_lookup_cpu(arch, cpu)
101+
return feature_names(arch, isa)
102+
end
103+
function feature_names(arch::String, isa::ISA)
104+
mapping = _build_bit_to_name(arch)
105+
return sort([get(mapping, bit, "unknown_$bit") for bit in isa.features])
106+
end
107+
108+
"""
109+
_lookup_cpu(name::String) -> ISA
110+
111+
Look up hardware features for the named CPU on the host architecture.
112+
Returns an empty ISA if the CPU name is not found.
113+
"""
114+
function _lookup_cpu(name::String)
115+
nbytes = ccall(:jl_cpufeatures_nbytes, Csize_t, ())
116+
buf = Vector{UInt8}(undef, nbytes)
117+
ret = ccall(:jl_cpufeatures_lookup, Cint, (Cstring, Ptr{UInt8}, Csize_t), name, buf, nbytes)
118+
ret != 0 && return ISA(Set{UInt32}())
119+
return _featurebytes_to_isa(buf)
120+
end
121+
122+
"""
123+
_host_isa() -> ISA
124+
125+
Get the hardware features of the host CPU from the cpufeatures library.
126+
"""
127+
function _host_isa()
128+
nbytes = ccall(:jl_cpufeatures_nbytes, Csize_t, ())
129+
buf = Vector{UInt8}(undef, nbytes)
130+
ccall(:jl_cpufeatures_host, Cvoid, (Ptr{UInt8}, Csize_t), buf, nbytes)
131+
return _featurebytes_to_isa(buf)
132+
end
133+
134+
# Build an ISA list for a given architecture family.
135+
# Uses cross-arch lookup so it works on any host.
136+
# Entries with empty cpuname get an empty ISA (generic baseline).
137+
function _make_isa_list(arch::String, entries::Vector{Pair{String,String}})
138+
result = Pair{String,ISA}[]
139+
for (label, cpuname) in entries
140+
if isempty(cpuname)
141+
push!(result, label => ISA(Set{UInt32}()))
142+
else
143+
push!(result, label => _cross_lookup_cpu(arch, cpuname))
144+
end
145+
end
146+
return result
147+
end
148+
149+
# ISA definitions per architecture family.
150+
# CPU names are LLVM names in the cpufeatures database.
151+
# Keep in sync with `arch_march_isa_mapping` in binaryplatforms.jl.
27152
const ISAs_by_family = Dict(
28-
"i686" => [
29-
# Source: https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html.
30-
# Implicit in all sets, because always required by Julia: mmx, sse, sse2
31-
"pentium4" => ISA(Set{UInt32}()),
32-
"prescott" => ISA(Set((JL_X86_sse3,))),
33-
],
34-
"x86_64" => [
35-
# Source: https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html.
36-
# Implicit in all sets, because always required by x86-64 architecture: mmx, sse, sse2
37-
"x86_64" => ISA(Set{UInt32}()),
38-
"core2" => ISA(Set((JL_X86_sse3, JL_X86_ssse3))),
39-
"nehalem" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt))),
40-
"sandybridge" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_aes, JL_X86_pclmul))),
41-
"haswell" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c))),
42-
"skylake" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves))),
43-
"skylake_avx512" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_pku, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves, JL_X86_avx512f, JL_X86_clwb, JL_X86_avx512vl, JL_X86_avx512bw, JL_X86_avx512dq, JL_X86_avx512cd))),
44-
],
45-
"armv6l" => [
46-
# The only armv6l processor we know of that runs Julia on armv6l
47-
# We don't have a good way to tell the different armv6l variants apart through features,
48-
# and honestly we don't care much since it's basically this one chip that people want to use with Julia.
49-
"arm1176jzfs" => ISA(Set{UInt32}()),
50-
],
51-
"armv7l" => [
52-
"armv7l" => ISA(Set{UInt32}()),
53-
"armv7l+neon" => ISA(Set((JL_AArch32_neon,))),
54-
"armv7l+neon+vfpv4" => ISA(Set((JL_AArch32_neon, JL_AArch32_vfp4))),
55-
],
56-
"aarch64" => [
57-
# Implicit in all sets, because always required: fp, asimd
58-
"armv8.0-a" => ISA(Set{UInt32}()),
59-
"armv8.1-a" => ISA(Set((JL_AArch64_v8_1a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm))),
60-
"armv8.2-a+crypto" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2))),
61-
"a64fx" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_sha2, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fullfp16, JL_AArch64_sve))),
62-
"apple_m1" => ISA(Set((JL_AArch64_v8_5a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2, JL_AArch64_sha3, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fp16fml, JL_AArch64_fullfp16, JL_AArch64_dotprod, JL_AArch64_rcpc, JL_AArch64_altnzcv))),
63-
],
64-
"riscv64" => [
65-
"riscv64" => ISA(Set{UInt32}()),
66-
],
67-
"powerpc64le" => [
68-
# We have no way to test powerpc64le features yet, so we're only going to declare the lowest ISA:
69-
"power8" => ISA(Set{UInt32}()),
70-
],
71-
"riscv64" => [
72-
# We have no way to test riscv64 features yet, so we're only going to declare the lowest ISA:
73-
"riscv64" => ISA(Set{UInt32}()),
74-
],
153+
"i686" => _make_isa_list("x86_64", [
154+
"pentium4" => "",
155+
"prescott" => "prescott",
156+
]),
157+
"x86_64" => _make_isa_list("x86_64", [
158+
"x86_64" => "",
159+
"core2" => "core2",
160+
"nehalem" => "nehalem",
161+
"sandybridge" => "sandybridge",
162+
"haswell" => "haswell",
163+
"skylake" => "skylake",
164+
"skylake_avx512" => "skylake-avx512",
165+
]),
166+
"aarch64" => _make_isa_list("aarch64", [
167+
"armv8.0-a" => "",
168+
"armv8.1-a" => "cortex-a76",
169+
"armv8.2-a+crypto" => "cortex-a78",
170+
"a64fx" => "a64fx",
171+
"apple_m1" => "apple-a14",
172+
]),
173+
"armv6l" => _make_isa_list("aarch64", [
174+
"arm1176jzfs" => "",
175+
]),
176+
"armv7l" => _make_isa_list("aarch64", [
177+
"armv7l" => "",
178+
"armv7l+neon" => "",
179+
"armv7l+neon+vfpv4" => "",
180+
]),
181+
"riscv64" => _make_isa_list("riscv64", [
182+
"riscv64" => "",
183+
]),
184+
"powerpc64le" => _make_isa_list("powerpc64le", [
185+
"power8" => "",
186+
]),
75187
)
76188

77189
# Test a CPU feature exists on the currently-running host
@@ -96,27 +208,13 @@ function normalize_arch(arch::String)
96208
return arch
97209
end
98210

99-
let
100-
# Collect all relevant features for the current architecture, if any.
101-
FEATURES = UInt32[]
102-
arch = normalize_arch(String(Sys.ARCH))
103-
if arch in keys(ISAs_by_family)
104-
for isa in ISAs_by_family[arch]
105-
unique!(append!(FEATURES, last(isa).features))
106-
end
107-
end
108-
109-
# Use `@eval` to inline the list of features.
110-
@eval function cpu_isa()
111-
return ISA(Set{UInt32}(feat for feat in $(FEATURES) if test_cpu_feature(feat)))
112-
end
113-
end
114-
115211
"""
116212
cpu_isa()
117213
118214
Return the [`ISA`](@ref) (instruction set architecture) of the current CPU.
119215
"""
120-
cpu_isa
216+
function cpu_isa()
217+
return _host_isa()
218+
end
121219

122220
end # module CPUID

base/loading.jl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,21 +1933,23 @@ end
19331933
struct ImageTarget
19341934
name::String
19351935
flags::Int32
1936+
base::Int32
19361937
ext_features::String
19371938
features_en::Vector{UInt8}
19381939
features_dis::Vector{UInt8}
19391940
end
19401941

19411942
function parse_image_target(io::IO)
19421943
flags = read(io, Int32)
1943-
nfeature = read(io, Int32)
1944-
feature_en = read(io, 4*nfeature)
1945-
feature_dis = read(io, 4*nfeature)
1944+
base = read(io, Int32)
1945+
nwords = read(io, Int32) # number of uint64_t feature words
1946+
feature_en = read(io, 8*nwords)
1947+
feature_dis = read(io, 8*nwords)
19461948
name_len = read(io, Int32)
19471949
name = String(read(io, name_len))
19481950
ext_features_len = read(io, Int32)
19491951
ext_features = String(read(io, ext_features_len))
1950-
ImageTarget(name, flags, ext_features, feature_en, feature_dis)
1952+
ImageTarget(name, flags, base, ext_features, feature_en, feature_dis)
19511953
end
19521954

19531955
function parse_image_targets(targets::Vector{UInt8})
@@ -1994,6 +1996,9 @@ function show(io::IO, it::ImageTarget)
19941996
if !isempty(it.ext_features)
19951997
print(io, ",", it.ext_features)
19961998
end
1999+
if it.base >= 0
2000+
print(io, "; base=", it.base)
2001+
end
19972002
print(io, "; flags=", it.flags)
19982003
print(io, "; features_en=(")
19992004
first = true

deps/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ ifeq ($(USE_SYSTEM_DSFMT), 0)
9595
DEP_LIBS += dsfmt
9696
endif
9797

98+
DEP_LIBS += cpufeatures
99+
98100
ifeq ($(USE_SYSTEM_LLVM), 0)
99101
DEP_LIBS += llvm
100102
endif
@@ -211,7 +213,7 @@ DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
211213
openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
212214
objconv openssl libssh2 nghttp2 curl libgit2 libwhich zlib zstd p7zip csl \
213215
sanitizers libsuitesparse lld libtracyclient ittapi nvtx \
214-
terminfo mmtk_julia
216+
terminfo mmtk_julia cpufeatures
215217
DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
216218

217219
ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -282,6 +284,7 @@ include $(SRCDIR)/unwind.mk
282284
include $(SRCDIR)/gmp.mk
283285
include $(SRCDIR)/mpfr.mk
284286
include $(SRCDIR)/patchelf.mk
287+
include $(SRCDIR)/cpufeatures.mk
285288
include $(SRCDIR)/openssl.mk
286289
include $(SRCDIR)/libssh2.mk
287290
include $(SRCDIR)/nghttp2.mk

deps/cpufeatures.mk

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
## CPUFEATURES - standalone CPU feature detection library ##
2+
include $(SRCDIR)/cpufeatures.version
3+
4+
CPUFEATURES_SRC_DIR := $(BUILDDIR)/cpufeatures-$(CPUFEATURES_VER)
5+
6+
$(SRCCACHE)/cpufeatures-$(CPUFEATURES_VER).tar.gz: | $(SRCCACHE)
7+
$(JLDOWNLOAD) $@ $(CPUFEATURES_TAR_URL)
8+
touch -c $@
9+
10+
$(CPUFEATURES_SRC_DIR)/source-extracted: $(SRCCACHE)/cpufeatures-$(CPUFEATURES_VER).tar.gz
11+
rm -rf $(dir $@)
12+
mkdir -p $(dir $@)
13+
$(TAR) -C $(dir $@) --strip-components 1 -xf $<
14+
echo 1 > $@
15+
16+
checksum-cpufeatures: $(SRCCACHE)/cpufeatures-$(CPUFEATURES_VER).tar.gz
17+
$(JLCHECKSUM) $<
18+
19+
$(CPUFEATURES_SRC_DIR)/build-compiled: $(CPUFEATURES_SRC_DIR)/source-extracted
20+
$(MAKE) -C $(CPUFEATURES_SRC_DIR) lib \
21+
CXX="$(CXX)" \
22+
CXXFLAGS="$(JCXXFLAGS) -O2" \
23+
ARCH=$(ARCH)
24+
echo 1 > $@
25+
26+
define CPUFEATURES_INSTALL
27+
mkdir -p $2/$$(build_includedir)/cpufeatures
28+
mkdir -p $2/$$(build_libdir)
29+
cp $1/include/*.h $2/$$(build_includedir)/cpufeatures/
30+
cp $1/generated/target_tables_*.h $2/$$(build_includedir)/cpufeatures/
31+
cp $1/build/libtarget_parsing.a $2/$$(build_libdir)/
32+
endef
33+
$(eval $(call staged-install, \
34+
cpufeatures,cpufeatures-$(CPUFEATURES_VER), \
35+
CPUFEATURES_INSTALL,,,,))
36+
37+
clean-cpufeatures:
38+
-rm -f $(CPUFEATURES_SRC_DIR)/build-compiled
39+
40+
distclean-cpufeatures:
41+
rm -rf $(SRCCACHE)/cpufeatures*.tar.gz $(CPUFEATURES_SRC_DIR)
42+
43+
get-cpufeatures: $(SRCCACHE)/cpufeatures-$(CPUFEATURES_VER).tar.gz
44+
extract-cpufeatures: $(CPUFEATURES_SRC_DIR)/source-extracted
45+
configure-cpufeatures: extract-cpufeatures
46+
compile-cpufeatures: $(CPUFEATURES_SRC_DIR)/build-compiled
47+
fastcheck-cpufeatures: check-cpufeatures
48+
check-cpufeatures: compile-cpufeatures

0 commit comments

Comments
 (0)