Skip to content

Commit 9d7a233

Browse files
committed
Revert "[RISCV] Refactor X60 scheduling model helper classes. NFC. (#151572)"
This reverts commit 969b246. The commit caused ubsan failure: ``` /home/ray/llvm/llvm/lib/TableGen/Record.cpp:1567:41: runtime error: shift exponent 18446744073709551612 is too large for 64-bit type 'uint64_t' (aka 'unsigned long') SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/ray/llvm/llvm/lib/TableGen/Record.cpp:1567:41 ```
1 parent 8e9e38a commit 9d7a233

File tree

1 file changed

+70
-88
lines changed

1 file changed

+70
-88
lines changed

llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td

Lines changed: 70 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -13,108 +13,78 @@
1313
//
1414
//===----------------------------------------------------------------------===//
1515

16-
//===----------------------------------------------------------------------===//
17-
// Helpers
18-
19-
// Maps LMUL string to corresponding value from the Values array
20-
// LMUL values map to array indices as follows:
21-
// MF8 -> Values[0], MF4 -> Values[1], MF2 -> Values[2], M1 -> Values[3],
22-
// M2 -> Values[4], M4 -> Values[5], M8 -> Values[6]
23-
// Shorter lists are allowed, e.g., widening instructions don't work on M8
24-
class GetLMULValue<list<int> Values, string LMUL> {
25-
defvar Index = !cond(
26-
!eq(LMUL, "MF8"): 0,
27-
!eq(LMUL, "MF4"): 1,
28-
!eq(LMUL, "MF2"): 2,
29-
!eq(LMUL, "M1"): 3,
30-
!eq(LMUL, "M2"): 4,
31-
!eq(LMUL, "M4"): 5,
32-
!eq(LMUL, "M8"): 6,
33-
);
34-
35-
assert !lt(Index, !size(Values)),
36-
"Missing LMUL value for '" # LMUL # "'. " #
37-
"Expected at least " # !add(Index, 1) # " elements, but got " #
38-
!size(Values) # ".";
39-
40-
int c = Values[Index];
16+
class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
17+
string LLMUL = LargestLMUL<MxList>.r;
18+
bit c = !eq(mx, LLMUL);
4119
}
4220

43-
// Returns BaseValue for LMUL values before startLMUL, Value for startLMUL,
44-
// then doubles Value for each subsequent LMUL
45-
// Example: ConstValueUntilLMULThenDoubleBase<"M1", 2, 4, "M8"> returns:
46-
// MF8->2, MF4->2, MF2->2, M1->4, M2->8, M4->16, M8->32
47-
// This is useful for modeling scheduling parameters that scale with LMUL.
48-
class ConstValueUntilLMULThenDoubleBase<string startLMUL, int BaseValue, int Value, string currentLMUL> {
49-
assert !le(BaseValue, Value), "BaseValue must be le to Value";
50-
defvar startPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], startLMUL>.c;
51-
defvar currentPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], currentLMUL>.c;
21+
class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
22+
string LLMUL = LargestLMUL<MxList>.r;
23+
int SSEW = SmallestSEW<mx, isF>.r;
24+
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
25+
}
5226

53-
// Calculate the difference in positions
54-
defvar posDiff = !sub(currentPos, startPos);
27+
defvar SMX60VLEN = 256;
28+
defvar SMX60DLEN = !div(SMX60VLEN, 2);
5529

56-
// Calculate Value * (2^posDiff) using shift left
30+
class Get1248Latency<string mx> {
5731
int c = !cond(
58-
!lt(posDiff, 0) : BaseValue,
59-
!eq(posDiff, 0) : Value,
60-
true: !mul(Value, !shl(1, posDiff))
32+
!eq(mx, "M2") : 2,
33+
!eq(mx, "M4") : 4,
34+
!eq(mx, "M8") : 8,
35+
true: 1
6136
);
6237
}
6338

64-
// Same as the previous function but BaseValue == Value
65-
class ConstValueUntilLMULThenDouble<string startLMUL, int Value, string currentLMUL> {
66-
int c = ConstValueUntilLMULThenDoubleBase<startLMUL, Value, Value, currentLMUL>.c;
67-
}
68-
69-
// Returns MF8->1, MF4->1, MF2->2, M1->4, M2->8, M4->16, M8->32
70-
class ConstOneUntilMF4ThenDouble<string mx> {
71-
int c = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
72-
}
73-
74-
// Returns MF8->1, MF4->1, MF2->1, M1->2, M2->4, M4->8, M8->16
75-
class ConstOneUntilMF2ThenDouble<string mx> {
76-
int c = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
77-
}
78-
79-
// Returns MF8->1, MF4->1, MF2->1, M1->1, M2->2, M4->4, M8->8
80-
class ConstOneUntilM1ThenDouble<string mx> {
81-
int c = ConstValueUntilLMULThenDouble<"M1", 1, mx>.c;
39+
// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides
40+
class Get4816Latency<string mx> {
41+
int c = !cond(
42+
!eq(mx, "M4") : 8,
43+
!eq(mx, "M8") : 16,
44+
true: 4
45+
);
8246
}
8347

84-
//===----------------------------------------------------------------------===//
85-
// Latency helper classes
86-
8748
// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max
88-
class Get4458Latency<string mx> {
89-
int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/4, /*M4=*/5, /*M8=*/8], mx>.c;
49+
class Get458Latency<string mx> {
50+
int c = !cond(
51+
!eq(mx, "M4") : 5,
52+
!eq(mx, "M8") : 8,
53+
true: 4
54+
);
9055
}
9156

92-
// Used for: widening operations (no M8)
57+
// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs
58+
// Used for: widening operations
9359
class Get4588Latency<string mx> {
94-
int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/5, /*M4=*/8], mx>.c;
60+
int c = !cond(
61+
!eq(mx, "M2") : 5,
62+
!eq(mx, "M4") : 8,
63+
!eq(mx, "M8") : 8, // M8 not supported for most widening, fallback
64+
true: 4
65+
);
9566
}
9667

9768
// Used for: mask-producing comparisons, carry ops with mask, FP comparisons
9869
class Get461018Latency<string mx> {
99-
int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c;
100-
}
101-
102-
//===----------------------------------------------------------------------===//
103-
104-
class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
105-
string LLMUL = LargestLMUL<MxList>.r;
106-
bit c = !eq(mx, LLMUL);
70+
int c = !cond(
71+
!eq(mx, "M2") : 6,
72+
!eq(mx, "M4") : 10,
73+
!eq(mx, "M8") : 18,
74+
true: 4
75+
);
10776
}
10877

109-
class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
110-
string LLMUL = LargestLMUL<MxList>.r;
111-
int SSEW = SmallestSEW<mx, isF>.r;
112-
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
78+
// Used for: e64 multiply pattern, complex ops
79+
class Get781632Latency<string mx> {
80+
int c = !cond(
81+
!eq(mx, "M2") : 8,
82+
!eq(mx, "M4") : 16,
83+
!eq(mx, "M8") : 32,
84+
true: 7
85+
);
11386
}
11487

115-
defvar SMX60VLEN = 256;
116-
defvar SMX60DLEN = !div(SMX60VLEN, 2);
117-
11888
def SpacemitX60Model : SchedMachineModel {
11989
let IssueWidth = 2; // dual-issue
12090
let MicroOpBufferSize = 0; // in-order
@@ -413,13 +383,12 @@ foreach LMul = [1, 2, 4, 8] in {
413383
foreach mx = SchedMxList in {
414384
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
415385

416-
let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in {
386+
let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
417387
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
418388
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
419389
}
420390

421-
defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
422-
let Latency = VIALULat, ReleaseAtCycles = [4] in {
391+
let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
423392
// Pattern of vadd, vsub, vrsub: 4/4/5/8
424393
// Pattern of vand, vor, vxor: 4/4/8/16
425394
// They are grouped together, so we used the worst case 4/4/8/16
@@ -456,7 +425,7 @@ foreach mx = SchedMxList in {
456425
// Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
457426
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
458427
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
459-
let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
428+
let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in {
460429
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
461430
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
462431
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -492,8 +461,15 @@ foreach mx = SchedMxList in {
492461
foreach sew = SchedSEWSet<mx>.val in {
493462
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
494463

495-
defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
496-
let Latency = VIDivLat, ReleaseAtCycles = [12] in {
464+
// Slightly reduced for fractional LMULs
465+
defvar Multiplier = !cond(
466+
!eq(mx, "MF8") : 12,
467+
!eq(mx, "MF4") : 12,
468+
!eq(mx, "MF2") : 12,
469+
true: 24
470+
);
471+
472+
let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in {
497473
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
498474
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
499475
}
@@ -504,8 +480,14 @@ foreach mx = SchedMxList in {
504480
foreach mx = SchedMxListW in {
505481
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
506482

507-
defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
508-
let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
483+
// Slightly increased for integer LMULs
484+
defvar Multiplier = !cond(
485+
!eq(mx, "M2") : 2,
486+
!eq(mx, "M4") : 2,
487+
true: 1
488+
);
489+
490+
let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in {
509491
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
510492
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
511493
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;

0 commit comments

Comments
 (0)