|
12 | 12 | ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=BMI |
13 | 13 | ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=BMI |
14 | 14 | ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=BMI |
15 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=BMI2 |
16 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=BMI2 |
17 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=BMI2 |
18 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=BMI2 |
19 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=BMI2 |
20 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=BMI2 |
| 15 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=BMI2-SLOW |
| 16 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=BMI2-SLOW |
| 17 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=BMI2-SLOW |
| 18 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=BMI2-FAST |
| 19 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=BMI2-FAST |
| 20 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=BMI2-FAST |
21 | 21 |
|
22 | 22 | ; Verify that for the X86_64 processors that are known to have poor latency |
23 | 23 | ; double precision shift instructions we do not generate 'shld' or 'shrd' |
@@ -53,15 +53,23 @@ define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { |
53 | 53 | ; BMI-NEXT: orq %rdi, %rax |
54 | 54 | ; BMI-NEXT: retq |
55 | 55 | ; |
56 | | -; BMI2-LABEL: lshift: |
57 | | -; BMI2: # %bb.0: # %entry |
58 | | -; BMI2-NEXT: # kill: def $edx killed $edx def $rdx |
59 | | -; BMI2-NEXT: shlxq %rdx, %rdi, %rcx |
60 | | -; BMI2-NEXT: notb %dl |
61 | | -; BMI2-NEXT: shrq %rsi |
62 | | -; BMI2-NEXT: shrxq %rdx, %rsi, %rax |
63 | | -; BMI2-NEXT: orq %rcx, %rax |
64 | | -; BMI2-NEXT: retq |
| 56 | +; BMI2-SLOW-LABEL: lshift: |
| 57 | +; BMI2-SLOW: # %bb.0: # %entry |
| 58 | +; BMI2-SLOW-NEXT: # kill: def $edx killed $edx def $rdx |
| 59 | +; BMI2-SLOW-NEXT: shlxq %rdx, %rdi, %rcx |
| 60 | +; BMI2-SLOW-NEXT: notb %dl |
| 61 | +; BMI2-SLOW-NEXT: shrq %rsi |
| 62 | +; BMI2-SLOW-NEXT: shrxq %rdx, %rsi, %rax |
| 63 | +; BMI2-SLOW-NEXT: orq %rcx, %rax |
| 64 | +; BMI2-SLOW-NEXT: retq |
| 65 | +; |
| 66 | +; BMI2-FAST-LABEL: lshift: |
| 67 | +; BMI2-FAST: # %bb.0: # %entry |
| 68 | +; BMI2-FAST-NEXT: movl %edx, %ecx |
| 69 | +; BMI2-FAST-NEXT: movq %rdi, %rax |
| 70 | +; BMI2-FAST-NEXT: # kill: def $cl killed $cl killed $ecx |
| 71 | +; BMI2-FAST-NEXT: shldq %cl, %rsi, %rax |
| 72 | +; BMI2-FAST-NEXT: retq |
65 | 73 | entry: |
66 | 74 | %sh_prom = zext i32 %c to i64 |
67 | 75 | %shl = shl i64 %a, %sh_prom |
@@ -100,15 +108,23 @@ define i64 @rshift(i64 %a, i64 %b, i32 %c) nounwind readnone { |
100 | 108 | ; BMI-NEXT: orq %rdi, %rax |
101 | 109 | ; BMI-NEXT: retq |
102 | 110 | ; |
103 | | -; BMI2-LABEL: rshift: |
104 | | -; BMI2: # %bb.0: # %entry |
105 | | -; BMI2-NEXT: # kill: def $edx killed $edx def $rdx |
106 | | -; BMI2-NEXT: shrxq %rdx, %rdi, %rcx |
107 | | -; BMI2-NEXT: notb %dl |
108 | | -; BMI2-NEXT: addq %rsi, %rsi |
109 | | -; BMI2-NEXT: shlxq %rdx, %rsi, %rax |
110 | | -; BMI2-NEXT: orq %rcx, %rax |
111 | | -; BMI2-NEXT: retq |
| 111 | +; BMI2-SLOW-LABEL: rshift: |
| 112 | +; BMI2-SLOW: # %bb.0: # %entry |
| 113 | +; BMI2-SLOW-NEXT: # kill: def $edx killed $edx def $rdx |
| 114 | +; BMI2-SLOW-NEXT: shrxq %rdx, %rdi, %rcx |
| 115 | +; BMI2-SLOW-NEXT: notb %dl |
| 116 | +; BMI2-SLOW-NEXT: addq %rsi, %rsi |
| 117 | +; BMI2-SLOW-NEXT: shlxq %rdx, %rsi, %rax |
| 118 | +; BMI2-SLOW-NEXT: orq %rcx, %rax |
| 119 | +; BMI2-SLOW-NEXT: retq |
| 120 | +; |
| 121 | +; BMI2-FAST-LABEL: rshift: |
| 122 | +; BMI2-FAST: # %bb.0: # %entry |
| 123 | +; BMI2-FAST-NEXT: movl %edx, %ecx |
| 124 | +; BMI2-FAST-NEXT: movq %rdi, %rax |
| 125 | +; BMI2-FAST-NEXT: # kill: def $cl killed $cl killed $ecx |
| 126 | +; BMI2-FAST-NEXT: shrdq %cl, %rsi, %rax |
| 127 | +; BMI2-FAST-NEXT: retq |
112 | 128 | entry: |
113 | 129 | %sh_prom = zext i32 %c to i64 |
114 | 130 | %shr = lshr i64 %a, %sh_prom |
|
0 commit comments