1
1
/*
2
- * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
3
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
4
*
5
5
* This code is free software; you can redistribute it and/or modify it
26
26
27
27
import static jdk .vm .ci .amd64 .AMD64 .xmm0 ;
28
28
import static jdk .vm .ci .amd64 .AMD64 .xmm1 ;
29
+ import static jdk .vm .ci .amd64 .AMD64 .xmm10 ;
30
+ import static jdk .vm .ci .amd64 .AMD64 .xmm11 ;
31
+ import static jdk .vm .ci .amd64 .AMD64 .xmm12 ;
32
+ import static jdk .vm .ci .amd64 .AMD64 .xmm13 ;
33
+ import static jdk .vm .ci .amd64 .AMD64 .xmm14 ;
34
+ import static jdk .vm .ci .amd64 .AMD64 .xmm15 ;
29
35
import static jdk .vm .ci .amd64 .AMD64 .xmm2 ;
30
36
import static jdk .vm .ci .amd64 .AMD64 .xmm3 ;
31
37
import static jdk .vm .ci .amd64 .AMD64 .xmm4 ;
44
50
import org .graalvm .compiler .asm .amd64 .AMD64Address ;
45
51
import org .graalvm .compiler .asm .amd64 .AMD64Assembler .ConditionFlag ;
46
52
import org .graalvm .compiler .asm .amd64 .AMD64MacroAssembler ;
53
+ import org .graalvm .compiler .core .amd64 .AMD64LIRGenerator ;
47
54
import org .graalvm .compiler .debug .GraalError ;
48
55
import org .graalvm .compiler .lir .LIRInstructionClass ;
49
56
import org .graalvm .compiler .lir .SyncPort ;
50
57
import org .graalvm .compiler .lir .asm .ArrayDataPointerConstant ;
51
58
import org .graalvm .compiler .lir .asm .CompilationResultBuilder ;
52
- import org .graalvm .compiler .lir .gen .LIRGeneratorTool ;
53
-
59
+ import jdk .vm .ci .amd64 .AMD64 .CPUFeature ;
54
60
import jdk .vm .ci .amd64 .AMD64Kind ;
55
61
import jdk .vm .ci .code .Register ;
56
62
import jdk .vm .ci .meta .AllocatableValue ;
@@ -78,11 +84,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
78
84
@ Temp ({REG }) private Value [] temps ;
79
85
private final boolean multiBlock ;
80
86
81
- public AMD64SHA1Op (LIRGeneratorTool tool , AllocatableValue bufValue , AllocatableValue stateValue ) {
87
+ public AMD64SHA1Op (AMD64LIRGenerator tool , AllocatableValue bufValue , AllocatableValue stateValue ) {
82
88
this (tool , bufValue , stateValue , Value .ILLEGAL , Value .ILLEGAL , Value .ILLEGAL , false );
83
89
}
84
90
85
- public AMD64SHA1Op (LIRGeneratorTool tool , AllocatableValue bufValue , AllocatableValue stateValue , AllocatableValue ofsValue ,
91
+ public AMD64SHA1Op (AMD64LIRGenerator tool , AllocatableValue bufValue , AllocatableValue stateValue , AllocatableValue ofsValue ,
86
92
AllocatableValue limitValue , AllocatableValue resultValue , boolean multiBlock ) {
87
93
super (TYPE );
88
94
@@ -94,18 +100,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable
94
100
95
101
this .multiBlock = multiBlock ;
96
102
97
- this .temps = new Value []{
98
- xmm0 .asValue (),
99
- xmm1 .asValue (),
100
- xmm2 .asValue (),
101
- xmm3 .asValue (),
102
- xmm4 .asValue (),
103
- xmm5 .asValue (),
104
- xmm6 .asValue (),
105
- xmm7 .asValue (),
106
- xmm8 .asValue (),
107
- xmm9 .asValue (),
108
- };
103
+ if (tool .supportsCPUFeature (CPUFeature .AVX )) {
104
+ // vzeroupper clears upper bits of xmm0-xmm15
105
+ this .temps = new Value []{
106
+ xmm0 .asValue (),
107
+ xmm1 .asValue (),
108
+ xmm2 .asValue (),
109
+ xmm3 .asValue (),
110
+ xmm4 .asValue (),
111
+ xmm5 .asValue (),
112
+ xmm6 .asValue (),
113
+ xmm7 .asValue (),
114
+ xmm8 .asValue (),
115
+ xmm9 .asValue (),
116
+ xmm10 .asValue (),
117
+ xmm11 .asValue (),
118
+ xmm12 .asValue (),
119
+ xmm13 .asValue (),
120
+ xmm14 .asValue (),
121
+ xmm15 .asValue (),
122
+ };
123
+ } else {
124
+ this .temps = new Value []{
125
+ xmm0 .asValue (),
126
+ xmm1 .asValue (),
127
+ xmm2 .asValue (),
128
+ xmm3 .asValue (),
129
+ xmm4 .asValue (),
130
+ xmm5 .asValue (),
131
+ xmm6 .asValue (),
132
+ xmm7 .asValue (),
133
+ xmm8 .asValue (),
134
+ xmm9 .asValue (),
135
+ };
136
+ }
109
137
110
138
if (multiBlock ) {
111
139
this .bufTempValue = tool .newVariable (bufValue .getValueKind ());
@@ -170,6 +198,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
170
198
Label labelDoneHash = new Label ();
171
199
Label labelLoop0 = new Label ();
172
200
201
+ if (masm .supports (CPUFeature .AVX )) {
202
+ // Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
203
+ // previously executed AVX instructions and the following SHA-1 instructions.
204
+ masm .vzeroupper ();
205
+ }
206
+
173
207
masm .movdqu (abcd , new AMD64Address (state , 0 ));
174
208
masm .pinsrd (e0 , new AMD64Address (state , 16 ), 3 );
175
209
masm .movdqu (shufMask , recordExternalAddress (crb , upperWordMask ));
0 commit comments