Skip to content

Commit 188c236

Browse files
committed
8353558: x86: Use better instructions for ICache sync when available
Reviewed-by: kvn, adinn
1 parent 428334b commit 188c236

File tree

16 files changed

+232
-37
lines changed

16 files changed

+232
-37
lines changed

src/hotspot/cpu/aarch64/icache_aarch64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,4 @@ void ICacheStubGenerator::generate_icache_flush(
3131
*flush_icache_stub = nullptr;
3232
}
3333

34-
void ICache::initialize() {}
34+
void ICache::initialize(int phase) {}

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,6 +2010,11 @@ void Assembler::cpuid() {
20102010
emit_int16(0x0F, (unsigned char)0xA2);
20112011
}
20122012

2013+
void Assembler::serialize() {
2014+
assert(VM_Version::supports_serialize(), "");
2015+
emit_int24(0x0F, 0x01, 0xE8);
2016+
}
2017+
20132018
// Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented
20142019
// F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v
20152020
// F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. -

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,9 @@ class Assembler : public AbstractAssembler {
12321232
// Identify processor type and features
12331233
void cpuid();
12341234

1235+
// Serialize instruction stream
1236+
void serialize();
1237+
12351238
// CRC32C
12361239
void crc32(Register crc, Register v, int8_t sizeInBytes);
12371240
void crc32(Register crc, Address adr, int8_t sizeInBytes);

src/hotspot/cpu/x86/globals_x86.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
191191
product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC, \
192192
"Turn off JVM mitigations related to Intel micro code " \
193193
"mitigations for the Intel JCC erratum") \
194+
\
195+
product(int, X86ICacheSync, -1, DIAGNOSTIC, \
196+
"Select the X86 ICache sync mechanism: -1 = auto-select; " \
197+
"0 = none (dangerous); 1 = CLFLUSH loop; 2 = CLFLUSHOPT loop; "\
198+
"3 = CLWB loop; 4 = single CPUID; 5 = single SERIALIZE. " \
199+
"Explicitly selected mechanism will fail at startup if " \
200+
"hardware does not support it.") \
201+
range(-1, 5) \
202+
\
194203
// end of ARCH_FLAGS
195204

196205
#endif // CPU_X86_GLOBALS_X86_HPP

src/hotspot/cpu/x86/icache_x86.cpp

Lines changed: 78 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,63 @@
2323
*/
2424

2525
#include "asm/macroAssembler.hpp"
26+
#include "runtime/flags/flagSetting.hpp"
27+
#include "runtime/globals_extension.hpp"
2628
#include "runtime/icache.hpp"
2729

2830
#define __ _masm->
2931

32+
void x86_generate_icache_fence(MacroAssembler* _masm) {
33+
switch (X86ICacheSync) {
34+
case 0:
35+
break;
36+
case 1:
37+
__ mfence();
38+
break;
39+
case 2:
40+
case 3:
41+
__ sfence();
42+
break;
43+
case 4:
44+
__ push(rax);
45+
__ push(rbx);
46+
__ push(rcx);
47+
__ push(rdx);
48+
__ xorptr(rax, rax);
49+
__ cpuid();
50+
__ pop(rdx);
51+
__ pop(rcx);
52+
__ pop(rbx);
53+
__ pop(rax);
54+
break;
55+
case 5:
56+
__ serialize();
57+
break;
58+
default:
59+
ShouldNotReachHere();
60+
}
61+
}
62+
63+
void x86_generate_icache_flush_insn(MacroAssembler* _masm, Register addr) {
64+
switch (X86ICacheSync) {
65+
case 1:
66+
__ clflush(Address(addr, 0));
67+
break;
68+
case 2:
69+
__ clflushopt(Address(addr, 0));
70+
break;
71+
case 3:
72+
__ clwb(Address(addr, 0));
73+
break;
74+
default:
75+
ShouldNotReachHere();
76+
}
77+
}
78+
3079
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
31-
StubCodeMark mark(this, "ICache", "flush_icache_stub");
80+
StubCodeMark mark(this, "ICache", _stub_name);
3281

3382
address start = __ pc();
34-
#ifdef AMD64
3583

3684
const Register addr = c_rarg0;
3785
const Register lines = c_rarg1;
@@ -40,31 +88,45 @@ void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flu
4088
Label flush_line, done;
4189

4290
__ testl(lines, lines);
43-
__ jcc(Assembler::zero, done);
91+
__ jccb(Assembler::zero, done);
4492

45-
// Force ordering wrt cflush.
46-
// Other fence and sync instructions won't do the job.
47-
__ mfence();
93+
x86_generate_icache_fence(_masm);
4894

49-
__ bind(flush_line);
50-
__ clflush(Address(addr, 0));
51-
__ addptr(addr, ICache::line_size);
52-
__ decrementl(lines);
53-
__ jcc(Assembler::notZero, flush_line);
95+
if (1 <= X86ICacheSync && X86ICacheSync <= 3) {
96+
__ bind(flush_line);
97+
x86_generate_icache_flush_insn(_masm, addr);
98+
__ addptr(addr, ICache::line_size);
99+
__ decrementl(lines);
100+
__ jccb(Assembler::notZero, flush_line);
54101

55-
__ mfence();
102+
x86_generate_icache_fence(_masm);
103+
}
56104

57105
__ bind(done);
58106

59-
#else
60-
const Address magic(rsp, 3*wordSize);
61-
__ lock(); __ addl(Address(rsp, 0), 0);
62-
#endif // AMD64
63107
__ movptr(rax, magic); // Handshake with caller to make sure it happened!
64108
__ ret(0);
65109

66110
// Must be set here so StubCodeMark destructor can call the flush stub.
67111
*flush_icache_stub = (ICache::flush_icache_stub_t)start;
68112
}
69113

114+
void ICache::initialize(int phase) {
115+
switch (phase) {
116+
case 1: {
117+
// Initial phase, we assume only CLFLUSH is available.
118+
IntFlagSetting fs(X86ICacheSync, 1);
119+
AbstractICache::initialize(phase);
120+
break;
121+
}
122+
case 2: {
123+
// Final phase, generate the stub again.
124+
AbstractICache::initialize(phase);
125+
break;
126+
}
127+
default:
128+
ShouldNotReachHere();
129+
}
130+
}
131+
70132
#undef __

src/hotspot/cpu/x86/icache_x86.hpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,13 @@
4040

4141
class ICache : public AbstractICache {
4242
public:
43-
#ifdef AMD64
4443
enum {
4544
stub_size = 64, // Size of the icache flush stub in bytes
4645
line_size = 64, // Icache line size in bytes
4746
log2_line_size = 6 // log2(line_size)
4847
};
4948

50-
// Use default implementation
51-
#else
52-
enum {
53-
stub_size = 16, // Size of the icache flush stub in bytes
54-
line_size = BytesPerWord, // conservative
55-
log2_line_size = LogBytesPerWord // log2(line_size)
56-
};
57-
#endif // AMD64
49+
static void initialize(int phase);
5850
};
5951

6052
#endif // CPU_X86_ICACHE_X86_HPP

src/hotspot/cpu/x86/vm_version_x86.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,30 @@ void VM_Version::get_processor_features() {
10891089
_has_intel_jcc_erratum = IntelJccErratumMitigation;
10901090
}
10911091

1092+
assert(supports_cpuid(), "Always present");
1093+
assert(supports_clflush(), "Always present");
1094+
if (X86ICacheSync == -1) {
1095+
// Auto-detect, choosing the best performant one that still flushes
1096+
// the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1097+
if (supports_clwb()) {
1098+
FLAG_SET_ERGO(X86ICacheSync, 3);
1099+
} else if (supports_clflushopt()) {
1100+
FLAG_SET_ERGO(X86ICacheSync, 2);
1101+
} else {
1102+
FLAG_SET_ERGO(X86ICacheSync, 1);
1103+
}
1104+
} else {
1105+
if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1106+
vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1107+
}
1108+
if ((X86ICacheSync == 3) && !supports_clwb()) {
1109+
vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1110+
}
1111+
if ((X86ICacheSync == 5) && !supports_serialize()) {
1112+
vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1113+
}
1114+
}
1115+
10921116
char buf[1024];
10931117
int res = jio_snprintf(
10941118
buf, sizeof(buf),

src/hotspot/cpu/zero/icache_zero.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
class ICache : public AbstractICache {
3535
public:
36-
static void initialize() {}
36+
static void initialize(int phase) {}
3737
static void invalidate_word(address addr) {}
3838
static void invalidate_range(address start, int nbytes) {}
3939
};

src/hotspot/os_cpu/bsd_aarch64/icache_bsd_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
class ICache : public AbstractICache {
3535
public:
36-
static void initialize();
36+
static void initialize(int phase);
3737
static void invalidate_word(address addr) {
3838
__clear_cache((char *)addr, (char *)(addr + 4));
3939
}

src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
class ICache : public AbstractICache {
3434
public:
35-
static void initialize();
35+
static void initialize(int phase);
3636
static void invalidate_word(address addr) {
3737
__builtin___clear_cache((char *)addr, (char *)(addr + 4));
3838
}

0 commit comments

Comments
 (0)