Skip to content

Commit 76ca4ba

Browse files
committed
What am I doing wrong, I just copied the code :(
1 parent 1221160 commit 76ca4ba

File tree

3 files changed

+142
-11
lines changed

3 files changed

+142
-11
lines changed

src/processor_arm.cpp

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,10 +1892,50 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
18921892

18931893
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
18941894
{
1895-
if (jit_targets.empty())
1896-
jl_error("JIT targets not initialized");
1897-
llvm::SmallVector<jl_target_spec_t, 0> res;
1898-
for (auto &target: jit_targets) {
1895+
1896+
auto &cmdline = get_cmdline_targets();
1897+
check_cmdline(cmdline, true);
1898+
static llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1899+
for (auto &arg: cmdline) {
1900+
auto data = arg_target_data(arg, image_targets.empty());
1901+
image_targets.push_back(std::move(data));
1902+
}
1903+
auto ntargets = image_targets.size();
1904+
// Now decide the clone condition.
1905+
for (size_t i = 1; i < ntargets; i++) {
1906+
auto &t = image_targets[i];
1907+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1908+
continue;
1909+
auto &features0 = image_targets[t.base].en.features;
1910+
// Always clone when code checks CPU features
1911+
t.en.flags |= JL_TARGET_CLONE_CPU;
1912+
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
1913+
for (auto fe: clone_fp16) {
1914+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1915+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1916+
break;
1917+
}
1918+
}
1919+
// The most useful one in general...
1920+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1921+
#ifdef _CPU_ARM_
1922+
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
1923+
for (auto fe: clone_math) {
1924+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1925+
t.en.flags |= JL_TARGET_CLONE_MATH;
1926+
break;
1927+
}
1928+
}
1929+
static constexpr uint32_t clone_simd[] = {Feature::neon};
1930+
for (auto fe: clone_simd) {
1931+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1932+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1933+
break;
1934+
}
1935+
}
1936+
#endif
1937+
}
1938+
for (auto &target: image_targets) {
18991939
auto features_en = target.en.features;
19001940
auto features_dis = target.dis.features;
19011941
for (auto &fename: feature_names) {

src/processor_fallback.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,24 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
147147

148148
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
149149
{
150-
if (jit_targets.empty())
151-
jl_error("JIT targets not initialized");
150+
151+
auto &cmdline = get_cmdline_targets();
152+
check_cmdline(cmdline, true);
153+
static llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
154+
for (auto &arg: cmdline) {
155+
auto data = arg_target_data(arg, image_targets.empty());
156+
image_targets.push_back(std::move(data));
157+
}
158+
auto ntargets = image_targets.size();
159+
// Now decide the clone condition.
160+
for (size_t i = 1; i < ntargets; i++) {
161+
auto &t = image_targets[i];
162+
t.en.flags |= JL_TARGET_CLONE_ALL;
163+
}
164+
if (image_targets.empty())
165+
jl_error("No image targets found");
152166
llvm::SmallVector<jl_target_spec_t, 0> res;
153-
for (auto &target: jit_targets) {
167+
for (auto &target: image_targets) {
154168
jl_target_spec_t ele;
155169
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
156170
ele.data = serialize_target_data(target.name, target.en.features,

src/processor_x86.cpp

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
910910
return match.best_idx;
911911
}
912912

913+
//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
914+
// so we won't call sysimg_init_cb, else this function shouldn't do anything.
913915
static void ensure_jit_target(bool imaging)
914916
{
915917
auto &cmdline = get_cmdline_targets();
@@ -1102,13 +1104,81 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
11021104
{feature_masks, 0}, {{}, 0}, 0});
11031105
return res;
11041106
}
1105-
1107+
//This function parses the -C command line to figure out which targets to multiversion to.
11061108
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
11071109
{
1108-
if (jit_targets.empty())
1109-
jl_error("JIT targets not initialized");
1110+
auto &cmdline = get_cmdline_targets();
1111+
check_cmdline(cmdline, true);
1112+
static llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1113+
for (auto &arg: cmdline) {
1114+
auto data = arg_target_data(arg, image_targets.empty());
1115+
image_targets.push_back(std::move(data));
1116+
}
1117+
1118+
auto ntargets = image_targets.size();
1119+
// Now decide the clone condition.
1120+
for (size_t i = 1; i < ntargets; i++) {
1121+
auto &t = image_targets[i];
1122+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1123+
continue;
1124+
// Always clone when code checks CPU features
1125+
t.en.flags |= JL_TARGET_CLONE_CPU;
1126+
// The most useful one in general...
1127+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1128+
auto &features0 = image_targets[t.base].en.features;
1129+
// Special case for KNL/KNM since they're so different
1130+
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
1131+
if ((t.name == "knl" || t.name == "knm") &&
1132+
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
1133+
t.en.flags |= JL_TARGET_CLONE_ALL;
1134+
break;
1135+
}
1136+
}
1137+
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
1138+
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
1139+
Feature::sse41, Feature::sse42,
1140+
Feature::avx, Feature::avx2,
1141+
Feature::vaes, Feature::vpclmulqdq,
1142+
Feature::sse4a, Feature::avx512f,
1143+
Feature::avx512dq, Feature::avx512ifma,
1144+
Feature::avx512pf, Feature::avx512er,
1145+
Feature::avx512cd, Feature::avx512bw,
1146+
Feature::avx512vl, Feature::avx512vbmi,
1147+
Feature::avx512vpopcntdq, Feature::avxvnni,
1148+
Feature::avx512vbmi2, Feature::avx512vnni,
1149+
Feature::avx512bitalg, Feature::avx512bf16,
1150+
Feature::avx512vp2intersect, Feature::avx512fp16};
1151+
for (auto fe: clone_math) {
1152+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1153+
t.en.flags |= JL_TARGET_CLONE_MATH;
1154+
break;
1155+
}
1156+
}
1157+
for (auto fe: clone_simd) {
1158+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1159+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1160+
break;
1161+
}
1162+
}
1163+
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
1164+
for (auto fe: clone_fp16) {
1165+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1166+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1167+
break;
1168+
}
1169+
}
1170+
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
1171+
for (auto fe: clone_bf16) {
1172+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1173+
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
1174+
break;
1175+
}
1176+
}
1177+
}
1178+
if (image_targets.empty())
1179+
jl_error("No targets specifiec");
11101180
llvm::SmallVector<jl_target_spec_t, 0> res;
1111-
for (auto &target: jit_targets) {
1181+
for (auto &target: image_targets) {
11121182
auto features_en = target.en.features;
11131183
auto features_dis = target.dis.features;
11141184
for (auto &fename: feature_names) {
@@ -1126,6 +1196,13 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
11261196
ele.base = target.base;
11271197
res.push_back(ele);
11281198
}
1199+
// print results
1200+
for (auto &ele: res) {
1201+
jl_safe_printf("Target: %s\n", ele.cpu_name.c_str());
1202+
jl_safe_printf("Features: %s\n", ele.cpu_features.c_str());
1203+
jl_safe_printf("Flags: %x\n", ele.flags);
1204+
jl_safe_printf("Base: %d\n", ele.base);
1205+
}
11291206
return res;
11301207
}
11311208

0 commit comments

Comments
 (0)