|
2 | 2 | ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 |
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE |
4 | 4 | ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE |
5 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX |
6 | | -; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX |
| 5 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 |
| 6 | +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 |
7 | 7 |
|
8 | 8 | ; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers |
9 | 9 |
|
@@ -1029,151 +1029,46 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { |
1029 | 1029 | define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { |
1030 | 1030 | ; X86-LABEL: reset_multiload_i128: |
1031 | 1031 | ; X86: # %bb.0: |
1032 | | -; X86-NEXT: pushl %ebp |
1033 | | -; X86-NEXT: movl %esp, %ebp |
1034 | 1032 | ; X86-NEXT: pushl %ebx |
1035 | 1033 | ; X86-NEXT: pushl %edi |
1036 | 1034 | ; X86-NEXT: pushl %esi |
1037 | | -; X86-NEXT: andl $-16, %esp |
1038 | | -; X86-NEXT: subl $64, %esp |
1039 | | -; X86-NEXT: movl 12(%ebp), %ecx |
1040 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1041 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1042 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1043 | | -; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
1044 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1045 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1046 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1047 | | -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
1048 | | -; X86-NEXT: movl %ecx, %eax |
1049 | | -; X86-NEXT: shrb $3, %al |
1050 | | -; X86-NEXT: andb $12, %al |
1051 | | -; X86-NEXT: negb %al |
1052 | | -; X86-NEXT: movsbl %al, %edi |
1053 | | -; X86-NEXT: movl 36(%esp,%edi), %edx |
1054 | | -; X86-NEXT: movl 40(%esp,%edi), %ebx |
1055 | | -; X86-NEXT: movl %ebx, %esi |
1056 | | -; X86-NEXT: shldl %cl, %edx, %esi |
1057 | | -; X86-NEXT: movl 32(%esp,%edi), %eax |
1058 | | -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
1059 | | -; X86-NEXT: movl 44(%esp,%edi), %edi |
1060 | | -; X86-NEXT: shldl %cl, %ebx, %edi |
1061 | | -; X86-NEXT: movl %eax, %ebx |
1062 | | -; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
1063 | | -; X86-NEXT: shll %cl, %ebx |
1064 | | -; X86-NEXT: notl %ebx |
1065 | | -; X86-NEXT: movl 16(%ebp), %eax |
| 1035 | +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 1036 | +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 1037 | +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
1066 | 1038 | ; X86-NEXT: movl (%eax), %eax |
1067 | | -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
1068 | | -; X86-NEXT: movl 12(%ebp), %eax |
1069 | | -; X86-NEXT: andl $96, %eax |
1070 | | -; X86-NEXT: shrl $3, %eax |
1071 | | -; X86-NEXT: movl 8(%ebp), %ecx |
1072 | | -; X86-NEXT: movl (%ecx,%eax), %eax |
1073 | | -; X86-NEXT: andl %ebx, (%ecx) |
1074 | | -; X86-NEXT: movl 12(%ebp), %ecx |
1075 | | -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload |
1076 | | -; X86-NEXT: shldl %cl, %ebx, %edx |
1077 | | -; X86-NEXT: notl %edx |
1078 | | -; X86-NEXT: movl 8(%ebp), %ebx |
1079 | | -; X86-NEXT: andl %edx, 4(%ebx) |
1080 | | -; X86-NEXT: notl %esi |
1081 | | -; X86-NEXT: andl %esi, 8(%ebx) |
1082 | | -; X86-NEXT: notl %edi |
1083 | | -; X86-NEXT: andl %edi, 12(%ebx) |
1084 | | -; X86-NEXT: btl %ecx, %eax |
1085 | | -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| 1039 | +; X86-NEXT: movl %edx, %esi |
| 1040 | +; X86-NEXT: andl $96, %esi |
| 1041 | +; X86-NEXT: shrl $3, %esi |
| 1042 | +; X86-NEXT: movl (%ecx,%esi), %edi |
| 1043 | +; X86-NEXT: movl %edi, %ebx |
| 1044 | +; X86-NEXT: btrl %edx, %ebx |
| 1045 | +; X86-NEXT: btl %edx, %edi |
| 1046 | +; X86-NEXT: movl %ebx, (%ecx,%esi) |
1086 | 1047 | ; X86-NEXT: jae .LBB22_2 |
1087 | 1048 | ; X86-NEXT: # %bb.1: |
1088 | 1049 | ; X86-NEXT: xorl %eax, %eax |
1089 | 1050 | ; X86-NEXT: .LBB22_2: |
1090 | | -; X86-NEXT: leal -12(%ebp), %esp |
1091 | 1051 | ; X86-NEXT: popl %esi |
1092 | 1052 | ; X86-NEXT: popl %edi |
1093 | 1053 | ; X86-NEXT: popl %ebx |
1094 | | -; X86-NEXT: popl %ebp |
1095 | 1054 | ; X86-NEXT: retl |
1096 | 1055 | ; |
1097 | | -; SSE-LABEL: reset_multiload_i128: |
1098 | | -; SSE: # %bb.0: |
1099 | | -; SSE-NEXT: movl %esi, %ecx |
1100 | | -; SSE-NEXT: movl $1, %esi |
1101 | | -; SSE-NEXT: xorl %r8d, %r8d |
1102 | | -; SSE-NEXT: shldq %cl, %rsi, %r8 |
1103 | | -; SSE-NEXT: xorl %eax, %eax |
1104 | | -; SSE-NEXT: shlq %cl, %rsi |
1105 | | -; SSE-NEXT: testb $64, %cl |
1106 | | -; SSE-NEXT: cmovneq %rsi, %r8 |
1107 | | -; SSE-NEXT: cmovneq %rax, %rsi |
1108 | | -; SSE-NEXT: notq %r8 |
1109 | | -; SSE-NEXT: notq %rsi |
1110 | | -; SSE-NEXT: movl %ecx, %r9d |
1111 | | -; SSE-NEXT: andl $96, %r9d |
1112 | | -; SSE-NEXT: shrl $3, %r9d |
1113 | | -; SSE-NEXT: movl (%rdi,%r9), %r9d |
1114 | | -; SSE-NEXT: btl %ecx, %r9d |
1115 | | -; SSE-NEXT: jb .LBB22_2 |
1116 | | -; SSE-NEXT: # %bb.1: |
1117 | | -; SSE-NEXT: movl (%rdx), %eax |
1118 | | -; SSE-NEXT: .LBB22_2: |
1119 | | -; SSE-NEXT: andq %r8, 8(%rdi) |
1120 | | -; SSE-NEXT: andq %rsi, (%rdi) |
1121 | | -; SSE-NEXT: # kill: def $eax killed $eax killed $rax |
1122 | | -; SSE-NEXT: retq |
1123 | | -; |
1124 | | -; AVX2-LABEL: reset_multiload_i128: |
1125 | | -; AVX2: # %bb.0: |
1126 | | -; AVX2-NEXT: movl %esi, %ecx |
1127 | | -; AVX2-NEXT: xorl %eax, %eax |
1128 | | -; AVX2-NEXT: movl $1, %r8d |
1129 | | -; AVX2-NEXT: xorl %esi, %esi |
1130 | | -; AVX2-NEXT: shldq %cl, %r8, %rsi |
1131 | | -; AVX2-NEXT: shlxq %rcx, %r8, %r8 |
1132 | | -; AVX2-NEXT: testb $64, %cl |
1133 | | -; AVX2-NEXT: cmovneq %r8, %rsi |
1134 | | -; AVX2-NEXT: cmovneq %rax, %r8 |
1135 | | -; AVX2-NEXT: notq %rsi |
1136 | | -; AVX2-NEXT: notq %r8 |
1137 | | -; AVX2-NEXT: movl %ecx, %r9d |
1138 | | -; AVX2-NEXT: andl $96, %r9d |
1139 | | -; AVX2-NEXT: shrl $3, %r9d |
1140 | | -; AVX2-NEXT: movl (%rdi,%r9), %r9d |
1141 | | -; AVX2-NEXT: btl %ecx, %r9d |
1142 | | -; AVX2-NEXT: jb .LBB22_2 |
1143 | | -; AVX2-NEXT: # %bb.1: |
1144 | | -; AVX2-NEXT: movl (%rdx), %eax |
1145 | | -; AVX2-NEXT: .LBB22_2: |
1146 | | -; AVX2-NEXT: andq %rsi, 8(%rdi) |
1147 | | -; AVX2-NEXT: andq %r8, (%rdi) |
1148 | | -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax |
1149 | | -; AVX2-NEXT: retq |
1150 | | -; |
1151 | | -; AVX512-LABEL: reset_multiload_i128: |
1152 | | -; AVX512: # %bb.0: |
1153 | | -; AVX512-NEXT: movl %esi, %ecx |
1154 | | -; AVX512-NEXT: movl $1, %r8d |
1155 | | -; AVX512-NEXT: xorl %esi, %esi |
1156 | | -; AVX512-NEXT: shldq %cl, %r8, %rsi |
1157 | | -; AVX512-NEXT: xorl %eax, %eax |
1158 | | -; AVX512-NEXT: shlxq %rcx, %r8, %r8 |
1159 | | -; AVX512-NEXT: testb $64, %cl |
1160 | | -; AVX512-NEXT: cmovneq %r8, %rsi |
1161 | | -; AVX512-NEXT: cmovneq %rax, %r8 |
1162 | | -; AVX512-NEXT: notq %rsi |
1163 | | -; AVX512-NEXT: notq %r8 |
1164 | | -; AVX512-NEXT: movl %ecx, %r9d |
1165 | | -; AVX512-NEXT: andl $96, %r9d |
1166 | | -; AVX512-NEXT: shrl $3, %r9d |
1167 | | -; AVX512-NEXT: movl (%rdi,%r9), %r9d |
1168 | | -; AVX512-NEXT: btl %ecx, %r9d |
1169 | | -; AVX512-NEXT: jb .LBB22_2 |
1170 | | -; AVX512-NEXT: # %bb.1: |
1171 | | -; AVX512-NEXT: movl (%rdx), %eax |
1172 | | -; AVX512-NEXT: .LBB22_2: |
1173 | | -; AVX512-NEXT: andq %rsi, 8(%rdi) |
1174 | | -; AVX512-NEXT: andq %r8, (%rdi) |
1175 | | -; AVX512-NEXT: # kill: def $eax killed $eax killed $rax |
1176 | | -; AVX512-NEXT: retq |
| 1056 | +; X64-LABEL: reset_multiload_i128: |
| 1057 | +; X64: # %bb.0: |
| 1058 | +; X64-NEXT: movl %esi, %ecx |
| 1059 | +; X64-NEXT: andl $96, %ecx |
| 1060 | +; X64-NEXT: shrl $3, %ecx |
| 1061 | +; X64-NEXT: movl (%rdi,%rcx), %r9d |
| 1062 | +; X64-NEXT: movl %r9d, %r8d |
| 1063 | +; X64-NEXT: btrl %esi, %r8d |
| 1064 | +; X64-NEXT: xorl %eax, %eax |
| 1065 | +; X64-NEXT: btl %esi, %r9d |
| 1066 | +; X64-NEXT: jb .LBB22_2 |
| 1067 | +; X64-NEXT: # %bb.1: |
| 1068 | +; X64-NEXT: movl (%rdx), %eax |
| 1069 | +; X64-NEXT: .LBB22_2: |
| 1070 | +; X64-NEXT: movl %r8d, (%rdi,%rcx) |
| 1071 | +; X64-NEXT: retq |
1177 | 1072 | %rem = and i32 %position, 127 |
1178 | 1073 | %ofs = zext nneg i32 %rem to i128 |
1179 | 1074 | %bit = shl nuw i128 1, %ofs |
|
0 commit comments