Skip to content

Commit adf733c

Browse files
authored
Merge pull request #65 from bulasevich/GR-60402
[GR-60402] Add vzeroupper upon the entrance of AMD64 sha1 and sha256 stubs.
2 parents a02d877 + 18ecc48 commit adf733c

File tree

2 files changed

+101
-35
lines changed

2 files changed

+101
-35
lines changed

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/amd64/AMD64SHA1Op.java

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,12 @@
2626

2727
import static jdk.vm.ci.amd64.AMD64.xmm0;
2828
import static jdk.vm.ci.amd64.AMD64.xmm1;
29+
import static jdk.vm.ci.amd64.AMD64.xmm10;
30+
import static jdk.vm.ci.amd64.AMD64.xmm11;
31+
import static jdk.vm.ci.amd64.AMD64.xmm12;
32+
import static jdk.vm.ci.amd64.AMD64.xmm13;
33+
import static jdk.vm.ci.amd64.AMD64.xmm14;
34+
import static jdk.vm.ci.amd64.AMD64.xmm15;
2935
import static jdk.vm.ci.amd64.AMD64.xmm2;
3036
import static jdk.vm.ci.amd64.AMD64.xmm3;
3137
import static jdk.vm.ci.amd64.AMD64.xmm4;
@@ -44,13 +50,13 @@
4450
import org.graalvm.compiler.asm.amd64.AMD64Address;
4551
import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
4652
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
53+
import org.graalvm.compiler.core.amd64.AMD64LIRGenerator;
4754
import org.graalvm.compiler.debug.GraalError;
4855
import org.graalvm.compiler.lir.LIRInstructionClass;
4956
import org.graalvm.compiler.lir.SyncPort;
5057
import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
5158
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
52-
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
53-
59+
import jdk.vm.ci.amd64.AMD64.CPUFeature;
5460
import jdk.vm.ci.amd64.AMD64Kind;
5561
import jdk.vm.ci.code.Register;
5662
import jdk.vm.ci.meta.AllocatableValue;
@@ -78,11 +84,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
7884
@Temp({REG}) private Value[] temps;
7985
private final boolean multiBlock;
8086

81-
public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
87+
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
8288
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
8389
}
8490

85-
public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
91+
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
8692
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
8793
super(TYPE);
8894

@@ -94,18 +100,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable
94100

95101
this.multiBlock = multiBlock;
96102

97-
this.temps = new Value[]{
98-
xmm0.asValue(),
99-
xmm1.asValue(),
100-
xmm2.asValue(),
101-
xmm3.asValue(),
102-
xmm4.asValue(),
103-
xmm5.asValue(),
104-
xmm6.asValue(),
105-
xmm7.asValue(),
106-
xmm8.asValue(),
107-
xmm9.asValue(),
108-
};
103+
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
104+
// vzeroupper clears upper bits of xmm0-xmm15
105+
this.temps = new Value[]{
106+
xmm0.asValue(),
107+
xmm1.asValue(),
108+
xmm2.asValue(),
109+
xmm3.asValue(),
110+
xmm4.asValue(),
111+
xmm5.asValue(),
112+
xmm6.asValue(),
113+
xmm7.asValue(),
114+
xmm8.asValue(),
115+
xmm9.asValue(),
116+
xmm10.asValue(),
117+
xmm11.asValue(),
118+
xmm12.asValue(),
119+
xmm13.asValue(),
120+
xmm14.asValue(),
121+
xmm15.asValue(),
122+
};
123+
} else {
124+
this.temps = new Value[]{
125+
xmm0.asValue(),
126+
xmm1.asValue(),
127+
xmm2.asValue(),
128+
xmm3.asValue(),
129+
xmm4.asValue(),
130+
xmm5.asValue(),
131+
xmm6.asValue(),
132+
xmm7.asValue(),
133+
xmm8.asValue(),
134+
xmm9.asValue(),
135+
};
136+
}
109137

110138
if (multiBlock) {
111139
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
@@ -170,6 +198,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
170198
Label labelDoneHash = new Label();
171199
Label labelLoop0 = new Label();
172200

201+
if (masm.supports(CPUFeature.AVX)) {
202+
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
203+
// previously executed AVX instructions and the following SHA-1 instructions.
204+
masm.vzeroupper();
205+
}
206+
173207
masm.movdqu(abcd, new AMD64Address(state, 0));
174208
masm.pinsrd(e0, new AMD64Address(state, 16), 3);
175209
masm.movdqu(shufMask, recordExternalAddress(crb, upperWordMask));

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/amd64/AMD64SHA256Op.java

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,11 @@
2727
import static jdk.vm.ci.amd64.AMD64.xmm0;
2828
import static jdk.vm.ci.amd64.AMD64.xmm1;
2929
import static jdk.vm.ci.amd64.AMD64.xmm10;
30+
import static jdk.vm.ci.amd64.AMD64.xmm11;
31+
import static jdk.vm.ci.amd64.AMD64.xmm12;
32+
import static jdk.vm.ci.amd64.AMD64.xmm13;
33+
import static jdk.vm.ci.amd64.AMD64.xmm14;
34+
import static jdk.vm.ci.amd64.AMD64.xmm15;
3035
import static jdk.vm.ci.amd64.AMD64.xmm2;
3136
import static jdk.vm.ci.amd64.AMD64.xmm3;
3237
import static jdk.vm.ci.amd64.AMD64.xmm4;
@@ -45,13 +50,13 @@
4550
import org.graalvm.compiler.asm.Label;
4651
import org.graalvm.compiler.asm.amd64.AMD64Address;
4752
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
53+
import org.graalvm.compiler.core.amd64.AMD64LIRGenerator;
4854
import org.graalvm.compiler.debug.GraalError;
4955
import org.graalvm.compiler.lir.LIRInstructionClass;
5056
import org.graalvm.compiler.lir.SyncPort;
5157
import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
5258
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
53-
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
54-
59+
import jdk.vm.ci.amd64.AMD64.CPUFeature;
5560
import jdk.vm.ci.amd64.AMD64Kind;
5661
import jdk.vm.ci.code.Register;
5762
import jdk.vm.ci.meta.AllocatableValue;
@@ -81,11 +86,11 @@ public final class AMD64SHA256Op extends AMD64LIRInstruction {
8186

8287
private final boolean multiBlock;
8388

84-
public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
89+
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
8590
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
8691
}
8792

88-
public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
93+
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
8994
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
9095
super(TYPE);
9196

@@ -99,19 +104,40 @@ public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatab
99104

100105
this.keyTempValue = tool.newVariable(bufValue.getValueKind());
101106

102-
this.temps = new Value[]{
103-
xmm0.asValue(),
104-
xmm1.asValue(),
105-
xmm2.asValue(),
106-
xmm3.asValue(),
107-
xmm4.asValue(),
108-
xmm5.asValue(),
109-
xmm6.asValue(),
110-
xmm7.asValue(),
111-
xmm8.asValue(),
112-
xmm9.asValue(),
113-
xmm10.asValue(),
114-
};
107+
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
108+
// vzeroupper clears upper bits of xmm0-xmm15
109+
this.temps = new Value[]{
110+
xmm0.asValue(),
111+
xmm1.asValue(),
112+
xmm2.asValue(),
113+
xmm3.asValue(),
114+
xmm4.asValue(),
115+
xmm5.asValue(),
116+
xmm6.asValue(),
117+
xmm7.asValue(),
118+
xmm8.asValue(),
119+
xmm9.asValue(),
120+
xmm10.asValue(),
121+
xmm11.asValue(),
122+
xmm12.asValue(),
123+
xmm13.asValue(),
124+
xmm14.asValue(),
125+
xmm15.asValue(),
126+
};
127+
} else {
128+
this.temps = new Value[]{
129+
xmm0.asValue(),
130+
xmm1.asValue(),
131+
xmm2.asValue(),
132+
xmm3.asValue(),
133+
xmm4.asValue(),
134+
xmm5.asValue(),
135+
xmm6.asValue(),
136+
xmm7.asValue(),
137+
xmm8.asValue(),
138+
xmm9.asValue(),
139+
};
140+
}
115141

116142
if (multiBlock) {
117143
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
@@ -201,6 +227,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
201227
// keyTemp replaces the hardcoded rax in the original stub.
202228
Register keyTemp = asRegister(keyTempValue);
203229

230+
if (masm.supports(CPUFeature.AVX)) {
231+
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
232+
// previously executed AVX instructions and the following SHA-256 instructions.
233+
masm.vzeroupper();
234+
}
235+
204236
masm.movdqu(state0, new AMD64Address(state, 0));
205237
masm.movdqu(state1, new AMD64Address(state, 16));
206238

0 commit comments

Comments
 (0)