Skip to content

Commit 0eef0b2

Browse files
Use branch instruction instead of conditional instruction in brIf
1 parent db9c104 commit 0eef0b2

File tree

7 files changed

+1846
-1625
lines changed

7 files changed

+1846
-1625
lines changed

Sources/WasmKit/Execution/DispatchInstruction.swift

Lines changed: 981 additions & 404 deletions
Large diffs are not rendered by default.

Sources/WasmKit/Execution/Execution.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ extension Pc {
156156
self += 1
157157
return value
158158
}
159+
160+
func next() -> (Pc, CodeSlot) {
161+
return (self.advanced(by: 1), pointee)
162+
}
159163
}
160164

161165
/// Executes a WebAssembly function.
@@ -366,13 +370,12 @@ extension Execution {
366370
var stats = StatsCollector()
367371
defer { stats.dump() }
368372
#endif
369-
var inst: UInt64
373+
var inst = pc.read(UInt64.self)
370374
while true {
371-
inst = pc.read(UInt64.self)
372375
#if EngineStats
373376
stats.track(inst)
374377
#endif
375-
try doExecute(inst, sp: &sp, pc: &pc, md: &md, ms: &ms)
378+
inst = try doExecute(inst, sp: &sp, pc: &pc, md: &md, ms: &ms)
376379
}
377380
}
378381

Sources/WasmKit/Execution/Instructions/Control.swift

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,62 @@
11
/// > Note:
22
/// <https://webassembly.github.io/spec/core/exec/instructions.html#control-instructions>
33
extension Execution {
4-
func unreachable(sp: Sp, pc: Pc) throws -> Pc {
4+
func unreachable(sp: Sp, pc: Pc) throws -> (Pc, CodeSlot) {
55
throw Trap.unreachable
66
}
77
mutating func nop(sp: Sp) {
88
}
99

10-
mutating func br(sp: Sp, pc: Pc, offset: Int32) -> Pc {
11-
return pc.advanced(by: Int(offset))
12-
}
13-
mutating func brIf(sp: Sp, pc: Pc, brIfOperand: Instruction.BrIfOperand) -> Pc {
14-
guard sp[i32: brIfOperand.condition] != 0 else {
15-
return pc
10+
mutating func br(sp: Sp, pc: Pc, offset: Int32) -> (Pc, CodeSlot) {
11+
return pc.advanced(by: Int(offset)).next()
12+
}
13+
mutating func brIf(sp: Sp, pc: Pc, brIfOperand: Instruction.BrIfOperand) -> (Pc, CodeSlot) {
14+
// NOTE: Marked as `_fastPath` to teach the compiler not to use conditional
15+
// instructions (e.g. csel) to utilize the branch prediction. Typically
16+
// if-conversion is applied to optimize branches into conditional instructions
17+
// but it's not always the best choice for performance when the branch is
18+
// highly predictable:
19+
//
20+
// > Use branches when the condition is highly predictable. The cost of
21+
// > mispredicts will be low, and the code will be executed with optimal
22+
// > latency.
23+
// >
24+
// > Apple Silicon CPU Optimization Guide: 3.0 (Page 105)
25+
//
26+
// We prefer branch instructions over conditional instructions to provide
27+
// the best performance when guest code is highly predictable.
28+
guard _fastPath(sp[i32: brIfOperand.condition] != 0) else {
29+
return pc.next()
1630
}
17-
return pc.advanced(by: Int(brIfOperand.offset))
31+
return pc.advanced(by: Int(brIfOperand.offset)).next()
1832
}
19-
mutating func brIfNot(sp: Sp, pc: Pc, brIfOperand: Instruction.BrIfOperand) -> Pc {
20-
guard sp[i32: brIfOperand.condition] == 0 else {
21-
return pc
33+
mutating func brIfNot(sp: Sp, pc: Pc, brIfOperand: Instruction.BrIfOperand) -> (Pc, CodeSlot) {
34+
// NOTE: See `brIf` for the rationale.
35+
guard _fastPath(sp[i32: brIfOperand.condition] == 0) else {
36+
return pc.next()
2237
}
23-
return pc.advanced(by: Int(brIfOperand.offset))
38+
return pc.advanced(by: Int(brIfOperand.offset)).next()
2439
}
25-
mutating func brTable(sp: Sp, pc: Pc, brTable: Instruction.BrTable) -> Pc {
40+
mutating func brTable(sp: Sp, pc: Pc, brTable: Instruction.BrTable) -> (Pc, CodeSlot) {
2641
let index = sp[i32: brTable.index]
2742
let normalizedOffset = min(Int(index), Int(brTable.count - 1))
2843
let entry = brTable.baseAddress[normalizedOffset]
29-
return pc.advanced(by: Int(entry.offset))
44+
return pc.advanced(by: Int(entry.offset)).next()
3045
}
3146

3247
@inline(__always)
33-
mutating func _return(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms) -> Pc {
48+
mutating func _return(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms) -> (Pc, CodeSlot) {
3449
var pc = pc
3550
popFrame(sp: &sp, pc: &pc, md: &md, ms: &ms)
36-
return pc
51+
return pc.next()
3752
}
3853

39-
mutating func endOfExecution(sp: inout Sp, pc: Pc) throws -> Pc {
54+
mutating func endOfExecution(sp: inout Sp, pc: Pc) throws -> (Pc, CodeSlot) {
4055
throw EndOfExecution()
4156
}
4257

4358
@inline(__always)
44-
mutating func call(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms, callOperand: Instruction.CallOperand) throws -> Pc {
59+
mutating func call(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms, callOperand: Instruction.CallOperand) throws -> (Pc, CodeSlot) {
4560
var pc = pc
4661

4762
(pc, sp) = try invoke(
@@ -50,7 +65,7 @@ extension Execution {
5065
callLike: callOperand.callLike,
5166
sp: sp, pc: pc, md: &md, ms: &ms
5267
)
53-
return pc
68+
return pc.next()
5469
}
5570

5671
@inline(__always)
@@ -74,15 +89,15 @@ extension Execution {
7489
}
7590

7691
@inline(__always)
77-
mutating func internalCall(sp: inout Sp, pc: Pc, internalCallOperand: Instruction.InternalCallOperand) throws -> Pc {
92+
mutating func internalCall(sp: inout Sp, pc: Pc, internalCallOperand: Instruction.InternalCallOperand) throws -> (Pc, CodeSlot) {
7893
var pc = pc
7994
let callee = internalCallOperand.callee
8095
try _internalCall(sp: &sp, pc: &pc, callee: callee, internalCallOperand: internalCallOperand)
81-
return pc
96+
return pc.next()
8297
}
8398

8499
@inline(__always)
85-
mutating func compilingCall(sp: inout Sp, pc: Pc, compilingCallOperand: Instruction.CompilingCallOperand) throws -> Pc {
100+
mutating func compilingCall(sp: inout Sp, pc: Pc, compilingCallOperand: Instruction.CompilingCallOperand) throws -> (Pc, CodeSlot) {
86101
var pc = pc
87102
// NOTE: `CompilingCallOperand` consumes 2 slots, discriminator is at -3
88103
let discriminatorPc = pc.advanced(by: -3)
@@ -96,7 +111,7 @@ extension Execution {
96111
discriminatorPc.pointee = UInt64(replaced.rawIndex)
97112
}
98113
try _internalCall(sp: &sp, pc: &pc, callee: callee, internalCallOperand: compilingCallOperand)
99-
return pc
114+
return pc.next()
100115
}
101116

102117
@inline(never)
@@ -126,7 +141,7 @@ extension Execution {
126141
}
127142

128143
@inline(__always)
129-
mutating func callIndirect(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms, callIndirectOperand: Instruction.CallIndirectOperand) throws -> Pc {
144+
mutating func callIndirect(sp: inout Sp, pc: Pc, md: inout Md, ms: inout Ms, callIndirectOperand: Instruction.CallIndirectOperand) throws -> (Pc, CodeSlot) {
130145
var pc = pc
131146
let (function, callerInstance) = try prepareForIndirectCall(
132147
sp: sp, tableIndex: callIndirectOperand.tableIndex, expectedType: callIndirectOperand.type,
@@ -138,7 +153,7 @@ extension Execution {
138153
callLike: callIndirectOperand.callLike,
139154
sp: sp, pc: pc, md: &md, ms: &ms
140155
)
141-
return pc
156+
return pc.next()
142157
}
143158

144159
mutating func onEnter(sp: Sp, onEnterOperand: Instruction.OnEnterOperand) {

Sources/WasmKit/Execution/Instructions/InstructionSupport.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,10 +390,16 @@ extension Instruction {
390390
}
391391

392392
struct BrIfOperand: Equatable, InstructionImmediate {
393-
let offset: Int32
394393
let condition: LVReg
394+
let offset: Int32
395+
396+
init(condition: LLVReg, offset: Int64) {
397+
self.offset = Int32(offset)
398+
self.condition = LVReg(condition)
399+
}
400+
395401
static func load(from pc: inout Pc) -> Self {
396-
pc.read()
402+
return pc.read()
397403
}
398404
static func emit(to emitSlot: ((Self) -> CodeSlot) -> Void) {
399405
emitSlot { unsafeBitCast($0, to: CodeSlot.self) }

Sources/WasmKit/Translator.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ struct InstructionTranslator<Context: TranslatorContext>: InstructionVisitor {
10411041
targetPC = endPC
10421042
}
10431043
let elseOrEnd = UInt32(targetPC.offsetFromHead - selfPC.offsetFromHead)
1044-
return .brIfNot(Instruction.BrIfOperand(offset: Int32(elseOrEnd), condition: LVReg(condition)))
1044+
return .brIfNot(Instruction.BrIfOperand(condition: LLVReg(condition), offset: Int64(elseOrEnd)))
10451045
}
10461046
}
10471047

@@ -1170,7 +1170,7 @@ struct InstructionTranslator<Context: TranslatorContext>: InstructionVisitor {
11701170
iseqBuilder.emitWithLabel(frame.continuation) { _, selfPC, continuation in
11711171
let relativeOffset = continuation.offsetFromHead - selfPC.offsetFromHead
11721172
return .brIf(Instruction.BrIfOperand(
1173-
offset: Int32(relativeOffset), condition: LVReg(condition)
1173+
condition: LLVReg(condition), offset: Int64(relativeOffset)
11741174
))
11751175
}
11761176
return
@@ -1200,7 +1200,7 @@ struct InstructionTranslator<Context: TranslatorContext>: InstructionVisitor {
12001200
let onBranchNotTaken = iseqBuilder.allocLabel()
12011201
iseqBuilder.emitWithLabel(onBranchNotTaken) { _, conditionCheckAt, continuation in
12021202
let relativeOffset = continuation.offsetFromHead - conditionCheckAt.offsetFromHead
1203-
return .brIfNot(Instruction.BrIfOperand(offset: Int32(relativeOffset), condition: LVReg(condition)))
1203+
return .brIfNot(Instruction.BrIfOperand(condition: LLVReg(condition), offset: Int64(relativeOffset)))
12041204
}
12051205
try copyOnBranch(targetFrame: frame)
12061206
try emitBranch(relativeDepth: relativeDepth) { offset, copyCount, popCount in

0 commit comments

Comments
 (0)