|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc -O1 -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s |
| 2 | +; RUN: llc -O1 -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | @a = common global i32 0, align 4 |
5 | 5 | @l = common global i32 0, align 4 |
@@ -200,3 +200,177 @@ for.end: ; preds = %for.inc, %entry |
200 | 200 | } |
201 | 201 |
|
202 | 202 | declare i32 @foo(i32, i32, i32, i32, i32, i32) |
| 203 | + |
| 204 | +define void @remat_load(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, i8 %stackarg0, i16 %stackarg1, i32 %stackarg2, i64 %stackarg3, half %stackarg4, bfloat %stackarg5, float %stackarg6, double %stackarg7, ptr %p) nounwind { |
| 205 | +; CHECK-LABEL: remat_load: |
| 206 | +; CHECK: # %bb.0: # %entry |
| 207 | +; CHECK-NEXT: addi sp, sp, -256 |
| 208 | +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill |
| 209 | +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill |
| 210 | +; CHECK-NEXT: sd s1, 232(sp) # 8-byte Folded Spill |
| 211 | +; CHECK-NEXT: sd s2, 224(sp) # 8-byte Folded Spill |
| 212 | +; CHECK-NEXT: sd s3, 216(sp) # 8-byte Folded Spill |
| 213 | +; CHECK-NEXT: sd s4, 208(sp) # 8-byte Folded Spill |
| 214 | +; CHECK-NEXT: sd s5, 200(sp) # 8-byte Folded Spill |
| 215 | +; CHECK-NEXT: sd s6, 192(sp) # 8-byte Folded Spill |
| 216 | +; CHECK-NEXT: sd s7, 184(sp) # 8-byte Folded Spill |
| 217 | +; CHECK-NEXT: sd s8, 176(sp) # 8-byte Folded Spill |
| 218 | +; CHECK-NEXT: sd s9, 168(sp) # 8-byte Folded Spill |
| 219 | +; CHECK-NEXT: sd s10, 160(sp) # 8-byte Folded Spill |
| 220 | +; CHECK-NEXT: sd s11, 152(sp) # 8-byte Folded Spill |
| 221 | +; CHECK-NEXT: fsd fs0, 144(sp) # 8-byte Folded Spill |
| 222 | +; CHECK-NEXT: fsd fs1, 136(sp) # 8-byte Folded Spill |
| 223 | +; CHECK-NEXT: fsd fs2, 128(sp) # 8-byte Folded Spill |
| 224 | +; CHECK-NEXT: fsd fs3, 120(sp) # 8-byte Folded Spill |
| 225 | +; CHECK-NEXT: fsd fs4, 112(sp) # 8-byte Folded Spill |
| 226 | +; CHECK-NEXT: fsd fs5, 104(sp) # 8-byte Folded Spill |
| 227 | +; CHECK-NEXT: fsd fs6, 96(sp) # 8-byte Folded Spill |
| 228 | +; CHECK-NEXT: fsd fs7, 88(sp) # 8-byte Folded Spill |
| 229 | +; CHECK-NEXT: fsd fs8, 80(sp) # 8-byte Folded Spill |
| 230 | +; CHECK-NEXT: fsd fs9, 72(sp) # 8-byte Folded Spill |
| 231 | +; CHECK-NEXT: fsd fs10, 64(sp) # 8-byte Folded Spill |
| 232 | +; CHECK-NEXT: fsd fs11, 56(sp) # 8-byte Folded Spill |
| 233 | +; CHECK-NEXT: fld fa5, 312(sp) |
| 234 | +; CHECK-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill |
| 235 | +; CHECK-NEXT: flw fa4, 304(sp) |
| 236 | +; CHECK-NEXT: fsw fa4, 44(sp) # 4-byte Folded Spill |
| 237 | +; CHECK-NEXT: flh fa3, 296(sp) |
| 238 | +; CHECK-NEXT: fsh fa3, 42(sp) # 2-byte Folded Spill |
| 239 | +; CHECK-NEXT: flh fa2, 288(sp) |
| 240 | +; CHECK-NEXT: fsh fa2, 40(sp) # 2-byte Folded Spill |
| 241 | +; CHECK-NEXT: ld a0, 320(sp) |
| 242 | +; CHECK-NEXT: sd a0, 0(sp) # 8-byte Folded Spill |
| 243 | +; CHECK-NEXT: lbu a4, 256(sp) |
| 244 | +; CHECK-NEXT: sd a4, 8(sp) # 8-byte Folded Spill |
| 245 | +; CHECK-NEXT: lh a3, 264(sp) |
| 246 | +; CHECK-NEXT: sd a3, 16(sp) # 8-byte Folded Spill |
| 247 | +; CHECK-NEXT: lw a2, 272(sp) |
| 248 | +; CHECK-NEXT: sd a2, 24(sp) # 8-byte Folded Spill |
| 249 | +; CHECK-NEXT: ld a1, 280(sp) |
| 250 | +; CHECK-NEXT: sd a1, 32(sp) # 8-byte Folded Spill |
| 251 | +; CHECK-NEXT: sb a4, 0(a0) |
| 252 | +; CHECK-NEXT: sh a3, 0(a0) |
| 253 | +; CHECK-NEXT: sw a2, 0(a0) |
| 254 | +; CHECK-NEXT: sd a1, 0(a0) |
| 255 | +; CHECK-NEXT: fsh fa2, 0(a0) |
| 256 | +; CHECK-NEXT: fsh fa3, 0(a0) |
| 257 | +; CHECK-NEXT: fsw fa4, 0(a0) |
| 258 | +; CHECK-NEXT: fsd fa5, 0(a0) |
| 259 | +; CHECK-NEXT: #APP |
| 260 | +; CHECK-NEXT: #NO_APP |
| 261 | +; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload |
| 262 | +; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload |
| 263 | +; CHECK-NEXT: sb a1, 0(a0) |
| 264 | +; CHECK-NEXT: ld a1, 16(sp) # 8-byte Folded Reload |
| 265 | +; CHECK-NEXT: sh a1, 0(a0) |
| 266 | +; CHECK-NEXT: ld a1, 24(sp) # 8-byte Folded Reload |
| 267 | +; CHECK-NEXT: sw a1, 0(a0) |
| 268 | +; CHECK-NEXT: ld a1, 32(sp) # 8-byte Folded Reload |
| 269 | +; CHECK-NEXT: sd a1, 0(a0) |
| 270 | +; CHECK-NEXT: flh fa5, 40(sp) # 2-byte Folded Reload |
| 271 | +; CHECK-NEXT: fsh fa5, 0(a0) |
| 272 | +; CHECK-NEXT: flh fa5, 42(sp) # 2-byte Folded Reload |
| 273 | +; CHECK-NEXT: fsh fa5, 0(a0) |
| 274 | +; CHECK-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload |
| 275 | +; CHECK-NEXT: fsw fa5, 0(a0) |
| 276 | +; CHECK-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload |
| 277 | +; CHECK-NEXT: fsd fa5, 0(a0) |
| 278 | +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload |
| 279 | +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload |
| 280 | +; CHECK-NEXT: ld s1, 232(sp) # 8-byte Folded Reload |
| 281 | +; CHECK-NEXT: ld s2, 224(sp) # 8-byte Folded Reload |
| 282 | +; CHECK-NEXT: ld s3, 216(sp) # 8-byte Folded Reload |
| 283 | +; CHECK-NEXT: ld s4, 208(sp) # 8-byte Folded Reload |
| 284 | +; CHECK-NEXT: ld s5, 200(sp) # 8-byte Folded Reload |
| 285 | +; CHECK-NEXT: ld s6, 192(sp) # 8-byte Folded Reload |
| 286 | +; CHECK-NEXT: ld s7, 184(sp) # 8-byte Folded Reload |
| 287 | +; CHECK-NEXT: ld s8, 176(sp) # 8-byte Folded Reload |
| 288 | +; CHECK-NEXT: ld s9, 168(sp) # 8-byte Folded Reload |
| 289 | +; CHECK-NEXT: ld s10, 160(sp) # 8-byte Folded Reload |
| 290 | +; CHECK-NEXT: ld s11, 152(sp) # 8-byte Folded Reload |
| 291 | +; CHECK-NEXT: fld fs0, 144(sp) # 8-byte Folded Reload |
| 292 | +; CHECK-NEXT: fld fs1, 136(sp) # 8-byte Folded Reload |
| 293 | +; CHECK-NEXT: fld fs2, 128(sp) # 8-byte Folded Reload |
| 294 | +; CHECK-NEXT: fld fs3, 120(sp) # 8-byte Folded Reload |
| 295 | +; CHECK-NEXT: fld fs4, 112(sp) # 8-byte Folded Reload |
| 296 | +; CHECK-NEXT: fld fs5, 104(sp) # 8-byte Folded Reload |
| 297 | +; CHECK-NEXT: fld fs6, 96(sp) # 8-byte Folded Reload |
| 298 | +; CHECK-NEXT: fld fs7, 88(sp) # 8-byte Folded Reload |
| 299 | +; CHECK-NEXT: fld fs8, 80(sp) # 8-byte Folded Reload |
| 300 | +; CHECK-NEXT: fld fs9, 72(sp) # 8-byte Folded Reload |
| 301 | +; CHECK-NEXT: fld fs10, 64(sp) # 8-byte Folded Reload |
| 302 | +; CHECK-NEXT: fld fs11, 56(sp) # 8-byte Folded Reload |
| 303 | +; CHECK-NEXT: addi sp, sp, 256 |
| 304 | +; CHECK-NEXT: ret |
| 305 | +entry: |
| 306 | + ; Force loading the stack arguments to create their live interval |
| 307 | + store volatile i8 %stackarg0, ptr %p |
| 308 | + store volatile i16 %stackarg1, ptr %p |
| 309 | + store volatile i32 %stackarg2, ptr %p |
| 310 | + store volatile i64 %stackarg3, ptr %p |
| 311 | + store volatile half %stackarg4, ptr %p |
| 312 | + store volatile bfloat %stackarg5, ptr %p |
| 313 | + store volatile float %stackarg6, ptr %p |
| 314 | + store volatile double %stackarg7, ptr %p |
| 315 | + tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() |
| 316 | + ; Now use them after spilling everything to force rematerialization |
| 317 | + store volatile i8 %stackarg0, ptr %p |
| 318 | + store volatile i16 %stackarg1, ptr %p |
| 319 | + store volatile i32 %stackarg2, ptr %p |
| 320 | + store volatile i64 %stackarg3, ptr %p |
| 321 | + store volatile half %stackarg4, ptr %p |
| 322 | + store volatile bfloat %stackarg5, ptr %p |
| 323 | + store volatile float %stackarg6, ptr %p |
| 324 | + store volatile double %stackarg7, ptr %p |
| 325 | + ret void |
| 326 | +} |
| 327 | + |
| 328 | +; We could remat the load of the constant global if we extended the live |
| 329 | +; interval of the high bits of the address. |
| 330 | + |
| 331 | +@const = constant i32 42 |
| 332 | +define i32 @constglobal_load() nounwind { |
| 333 | +; CHECK-LABEL: constglobal_load: |
| 334 | +; CHECK: # %bb.0: # %entry |
| 335 | +; CHECK-NEXT: addi sp, sp, -112 |
| 336 | +; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill |
| 337 | +; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill |
| 338 | +; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill |
| 339 | +; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill |
| 340 | +; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill |
| 341 | +; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill |
| 342 | +; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill |
| 343 | +; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill |
| 344 | +; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill |
| 345 | +; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill |
| 346 | +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill |
| 347 | +; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill |
| 348 | +; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill |
| 349 | +; CHECK-NEXT: lui a0, %hi(const) |
| 350 | +; CHECK-NEXT: lw a0, %lo(const)(a0) |
| 351 | +; CHECK-NEXT: sd a0, 0(sp) # 8-byte Folded Spill |
| 352 | +; CHECK-NEXT: #APP |
| 353 | +; CHECK-NEXT: #NO_APP |
| 354 | +; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload |
| 355 | +; CHECK-NEXT: addiw a0, a0, 1 |
| 356 | +; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload |
| 357 | +; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload |
| 358 | +; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload |
| 359 | +; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload |
| 360 | +; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload |
| 361 | +; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload |
| 362 | +; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload |
| 363 | +; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload |
| 364 | +; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload |
| 365 | +; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload |
| 366 | +; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload |
| 367 | +; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload |
| 368 | +; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload |
| 369 | +; CHECK-NEXT: addi sp, sp, 112 |
| 370 | +; CHECK-NEXT: ret |
| 371 | +entry: |
| 372 | + %global = load i32, ptr @const |
| 373 | + tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() |
| 374 | + %a = add i32 %global, 1 |
| 375 | + ret i32 %a |
| 376 | +} |
0 commit comments