Skip to content

Commit d548268

Browse files
authored
Aggressively prune no-side-effect instructions during DCE. (#691)
* Aggressively prune no-side-effect instructions during DCE. Since we're walking all the instructions anyway, it's practically zero-cost. * Reverse iteration order within a function. This allows to root more instructions per `spread_roots` invocation, becoming zero-cost in absence of loops. * Manually iterate over function instructions in reverse order.
1 parent cccb973 commit d548268

File tree

2 files changed

+173
-22
lines changed

2 files changed

+173
-22
lines changed

crates/rustc_codegen_spirv/src/linker/dce.rs

Lines changed: 156 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
//! *references* a rooted thing is also rooted, not the other way around - but that's the basic
88
//! concept.
99
10-
use rspirv::dr::{Function, Instruction, Module};
11-
use rspirv::spirv::{Op, Word};
10+
use rspirv::dr::{Function, Instruction, Module, Operand};
11+
use rspirv::spirv::{Op, StorageClass, Word};
1212
use rustc_data_structures::fx::FxHashSet;
1313

1414
pub fn dce(module: &mut Module) {
@@ -36,8 +36,29 @@ fn spread_roots(module: &Module, rooted: &mut FxHashSet<Word>) -> bool {
3636
}
3737
for func in &module.functions {
3838
if rooted.contains(&func.def_id().unwrap()) {
39-
for inst in func.all_inst_iter() {
40-
any |= root(inst, rooted);
39+
// NB (Mobius 2021) - since later insts are much more likely to reference
40+
// earlier insts, by reversing the iteration order, we're more likely to root the
41+
// entire relevant function at once.
42+
// See https://github.com/EmbarkStudios/rust-gpu/pull/691#discussion_r681477091
43+
for inst in func
44+
.end
45+
.iter()
46+
.chain(
47+
func.blocks
48+
.iter()
49+
.rev()
50+
.flat_map(|b| b.instructions.iter().rev().chain(b.label.iter())),
51+
)
52+
.chain(func.parameters.iter().rev())
53+
.chain(func.def.iter())
54+
{
55+
if !instruction_is_pure(inst) {
56+
any |= root(inst, rooted);
57+
} else if let Some(id) = inst.result_id {
58+
if rooted.contains(&id) {
59+
any |= root(inst, rooted);
60+
}
61+
}
4162
}
4263
}
4364
}
@@ -90,6 +111,13 @@ fn kill_unrooted(module: &mut Module, rooted: &FxHashSet<Word>) {
90111
module
91112
.functions
92113
.retain(|f| is_rooted(f.def.as_ref().unwrap(), rooted));
114+
module.functions.iter_mut().for_each(|fun| {
115+
fun.blocks.iter_mut().for_each(|block| {
116+
block
117+
.instructions
118+
.retain(|inst| !instruction_is_pure(inst) || is_rooted(inst, rooted));
119+
});
120+
});
93121
}
94122

95123
pub fn dce_phi(func: &mut Function) {
@@ -115,3 +143,127 @@ pub fn dce_phi(func: &mut Function) {
115143
.retain(|inst| inst.class.opcode != Op::Phi || used.contains(&inst.result_id.unwrap()));
116144
}
117145
}
146+
147+
fn instruction_is_pure(inst: &Instruction) -> bool {
148+
use Op::*;
149+
match inst.class.opcode {
150+
Nop
151+
| Undef
152+
| ConstantTrue
153+
| ConstantFalse
154+
| Constant
155+
| ConstantComposite
156+
| ConstantSampler
157+
| ConstantNull
158+
| AccessChain
159+
| InBoundsAccessChain
160+
| PtrAccessChain
161+
| ArrayLength
162+
| InBoundsPtrAccessChain
163+
| CompositeConstruct
164+
| CompositeExtract
165+
| CopyObject
166+
| Transpose
167+
| ConvertFToU
168+
| ConvertFToS
169+
| ConvertSToF
170+
| ConvertUToF
171+
| UConvert
172+
| SConvert
173+
| FConvert
174+
| QuantizeToF16
175+
| ConvertPtrToU
176+
| SatConvertSToU
177+
| SatConvertUToS
178+
| ConvertUToPtr
179+
| PtrCastToGeneric
180+
| GenericCastToPtr
181+
| GenericCastToPtrExplicit
182+
| Bitcast
183+
| SNegate
184+
| FNegate
185+
| IAdd
186+
| FAdd
187+
| ISub
188+
| FSub
189+
| IMul
190+
| FMul
191+
| UDiv
192+
| SDiv
193+
| FDiv
194+
| UMod
195+
| SRem
196+
| SMod
197+
| FRem
198+
| FMod
199+
| VectorTimesScalar
200+
| MatrixTimesScalar
201+
| VectorTimesMatrix
202+
| MatrixTimesVector
203+
| MatrixTimesMatrix
204+
| OuterProduct
205+
| Dot
206+
| IAddCarry
207+
| ISubBorrow
208+
| UMulExtended
209+
| SMulExtended
210+
| Any
211+
| All
212+
| IsNan
213+
| IsInf
214+
| IsFinite
215+
| IsNormal
216+
| SignBitSet
217+
| LessOrGreater
218+
| Ordered
219+
| Unordered
220+
| LogicalEqual
221+
| LogicalNotEqual
222+
| LogicalOr
223+
| LogicalAnd
224+
| LogicalNot
225+
| Select
226+
| IEqual
227+
| INotEqual
228+
| UGreaterThan
229+
| SGreaterThan
230+
| UGreaterThanEqual
231+
| SGreaterThanEqual
232+
| ULessThan
233+
| SLessThan
234+
| ULessThanEqual
235+
| SLessThanEqual
236+
| FOrdEqual
237+
| FUnordEqual
238+
| FOrdNotEqual
239+
| FUnordNotEqual
240+
| FOrdLessThan
241+
| FUnordLessThan
242+
| FOrdGreaterThan
243+
| FUnordGreaterThan
244+
| FOrdLessThanEqual
245+
| FUnordLessThanEqual
246+
| FOrdGreaterThanEqual
247+
| FUnordGreaterThanEqual
248+
| ShiftRightLogical
249+
| ShiftRightArithmetic
250+
| ShiftLeftLogical
251+
| BitwiseOr
252+
| BitwiseXor
253+
| BitwiseAnd
254+
| Not
255+
| BitFieldInsert
256+
| BitFieldSExtract
257+
| BitFieldUExtract
258+
| BitReverse
259+
| BitCount
260+
| Phi
261+
| SizeOf
262+
| CopyLogical
263+
| PtrEqual
264+
| PtrNotEqual
265+
| PtrDiff => true,
266+
Variable => inst.operands.get(0) == Some(&Operand::StorageClass(StorageClass::Function)),
267+
_ => false,
268+
}
269+
}

tests/ui/dis/index_user_dst.stderr

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,32 @@ OpLine %5 7 12
55
%10 = OpArrayLength %11 %8 0
66
OpLine %5 7 0
77
%12 = OpCompositeInsert %13 %6 %14 0
8-
%15 = OpCompositeConstruct %13 %6 %10
98
OpLine %5 8 21
10-
%16 = OpULessThan %17 %9 %10
9+
%15 = OpULessThan %16 %9 %10
1110
OpLine %5 8 21
12-
OpSelectionMerge %18 None
13-
OpBranchConditional %16 %19 %20
14-
%19 = OpLabel
11+
OpSelectionMerge %17 None
12+
OpBranchConditional %15 %18 %19
13+
%18 = OpLabel
1514
OpLine %5 8 21
16-
%21 = OpInBoundsAccessChain %22 %6 %9
17-
%23 = OpLoad %24 %21
15+
%20 = OpInBoundsAccessChain %21 %6 %9
16+
%22 = OpLoad %23 %20
1817
OpLine %5 10 1
1918
OpReturn
20-
%20 = OpLabel
19+
%19 = OpLabel
2120
OpLine %5 8 21
21+
OpBranch %24
22+
%24 = OpLabel
2223
OpBranch %25
2324
%25 = OpLabel
24-
OpBranch %26
25-
%26 = OpLabel
26-
%27 = OpPhi %17 %28 %25 %28 %29
27-
OpLoopMerge %30 %29 None
28-
OpBranchConditional %27 %31 %30
29-
%31 = OpLabel
30-
OpBranch %29
31-
%29 = OpLabel
32-
OpBranch %26
25+
%26 = OpPhi %16 %27 %24 %27 %28
26+
OpLoopMerge %29 %28 None
27+
OpBranchConditional %26 %30 %29
3328
%30 = OpLabel
29+
OpBranch %28
30+
%28 = OpLabel
31+
OpBranch %25
32+
%29 = OpLabel
3433
OpUnreachable
35-
%18 = OpLabel
34+
%17 = OpLabel
3635
OpUnreachable
3736
OpFunctionEnd

0 commit comments

Comments
 (0)