Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1890,12 +1890,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
if (image_targets.empty())
jl_error("No targets specifiec");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
auto &features0 = image_targets[t.base].en.features;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
#ifdef _CPU_ARM_
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
static constexpr uint32_t clone_simd[] = {Feature::neon};
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
#endif
}
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1916,6 +1960,8 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
return res;
}

#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
if (feature >= 32 * feature_sz)
Expand Down
23 changes: 19 additions & 4 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
t.en.flags |= JL_TARGET_CLONE_ALL;
}
if (image_targets.empty())
jl_error("No image targets found");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
jl_target_spec_t ele;
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
ele.data = serialize_target_data(target.name, target.en.features,
Expand All @@ -161,6 +175,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
Expand Down
80 changes: 76 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
return match.best_idx;
}

//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
// so we won't call sysimg_init_cb, else this function shouldn't do anything.
static void ensure_jit_target(bool imaging)
{
auto &cmdline = get_cmdline_targets();
Expand Down Expand Up @@ -1102,13 +1104,82 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
{feature_masks, 0}, {{}, 0}, 0});
return res;
}

//This function parses the -C command line to figure out which targets to multiversion to.
#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}

auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
auto &features0 = image_targets[t.base].en.features;
// Special case for KNL/KNM since they're so different
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
if ((t.name == "knl" || t.name == "knm") &&
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
t.en.flags |= JL_TARGET_CLONE_ALL;
break;
}
}
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
Feature::sse41, Feature::sse42,
Feature::avx, Feature::avx2,
Feature::vaes, Feature::vpclmulqdq,
Feature::sse4a, Feature::avx512f,
Feature::avx512dq, Feature::avx512ifma,
Feature::avx512pf, Feature::avx512er,
Feature::avx512cd, Feature::avx512bw,
Feature::avx512vl, Feature::avx512vbmi,
Feature::avx512vpopcntdq, Feature::avxvnni,
Feature::avx512vbmi2, Feature::avx512vnni,
Feature::avx512bitalg, Feature::avx512bf16,
Feature::avx512vp2intersect, Feature::avx512fp16};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
for (auto fe: clone_bf16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
break;
}
}
}
if (image_targets.empty())
jl_error("No targets specifiec");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1128,6 +1199,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
Expand Down