1+ // ===-- X86EliminateRedundantZeroExtend.cpp - Eliminate Redundant ZExt ---===//
2+ //
3+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+ // See https://llvm.org/LICENSE.txt for license information.
5+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+ //
7+ // ===----------------------------------------------------------------------===//
8+ // / \file
9+ // / This pass eliminates redundant zero-extension instructions where the source
10+ // / register is a sub-register of the destination and the destination's upper
11+ // / bits are known to be zero.
12+ // /
13+ // / For example:
14+ // / movzbl (%rdi), %ecx ; ECX = zero-extend byte, upper 24 bits are zero
15+ // / ...
16+ // / movzbl %cl, %ecx ; Redundant! CL is part of ECX, upper bits already 0
17+ // /
18+ // / This pattern commonly occurs in loops processing byte values.
19+ // ===----------------------------------------------------------------------===//
20+
21+ #include " X86.h"
22+ #include " X86InstrInfo.h"
23+ #include " X86Subtarget.h"
24+ #include " llvm/CodeGen/MachineFunctionPass.h"
25+ #include " llvm/CodeGen/MachineInstrBuilder.h"
26+ #include " llvm/CodeGen/MachineRegisterInfo.h"
27+ #include " llvm/CodeGen/TargetInstrInfo.h"
28+ #include " llvm/Support/Debug.h"
29+
30+ using namespace llvm ;
31+
32+ #define DEBUG_TYPE " x86-eliminate-zext"
33+ #define PASS_NAME " X86 Eliminate Redundant Zero Extension"
34+
35+ namespace {
36+ class EliminateRedundantZeroExtend : public MachineFunctionPass {
37+ public:
38+ static char ID;
39+ EliminateRedundantZeroExtend () : MachineFunctionPass(ID) {}
40+
41+ bool runOnMachineFunction (MachineFunction &MF) override ;
42+
43+ StringRef getPassName () const override { return PASS_NAME; }
44+
45+ MachineFunctionProperties getRequiredProperties () const override {
46+ return MachineFunctionProperties ().setNoVRegs ();
47+ }
48+
49+ private:
50+ const X86InstrInfo *TII = nullptr ;
51+ const TargetRegisterInfo *TRI = nullptr ;
52+
53+ // / Check if the register's upper bits are known to be zero at this point.
54+ // / This checks backward from MI to find the most recent definition of Reg.
55+ bool hasZeroUpperBits (Register Reg, const MachineInstr &MI,
56+ const MachineBasicBlock &MBB) const ;
57+
58+ // / Try to eliminate a redundant MOVZX instruction.
59+ bool tryEliminateRedundantZeroExtend (MachineInstr &MI,
60+ MachineBasicBlock &MBB) const ;
61+ };
62+
63+ char EliminateRedundantZeroExtend::ID = 0 ;
64+ } // end anonymous namespace
65+
66+ FunctionPass *llvm::createX86EliminateRedundantZeroExtend () {
67+ return new EliminateRedundantZeroExtend ();
68+ }
69+
70+ bool EliminateRedundantZeroExtend::hasZeroUpperBits (
71+ Register Reg, const MachineInstr &MI, const MachineBasicBlock &MBB) const {
72+ // Walk backward from MI to find the most recent definition of Reg
73+ MachineBasicBlock::const_reverse_iterator I = ++MI.getReverseIterator ();
74+ MachineBasicBlock::const_reverse_iterator E = MBB.rend ();
75+ for (; I != E; ++I) {
76+ const MachineInstr &Inst = *I;
77+
78+ // Check if this instruction defines Reg
79+ for (const MachineOperand &MO : Inst.operands ()) {
80+ if (!MO.isReg () || !MO.isDef ())
81+ continue ;
82+
83+ Register DefReg = MO.getReg ();
84+ if (DefReg == Reg || TRI->isSuperRegister (Reg, DefReg)) {
85+ // Found a definition - check if it zeros upper bits
86+ unsigned Opc = Inst.getOpcode ();
87+ switch (Opc) {
88+ // These instructions zero-extend to 32 bits
89+ case X86::MOVZX32rm8:
90+ case X86::MOVZX32rr8:
91+ case X86::MOVZX32rm16:
92+ case X86::MOVZX32rr16:
93+ return true ;
94+ // XOR with self zeros the register
95+ case X86::XOR32rr:
96+ if (Inst.getOperand (1 ).getReg () == Inst.getOperand (2 ).getReg ())
97+ return true ;
98+ return false ;
99+ // MOV32r0 explicitly zeros
100+ case X86::MOV32r0:
101+ return true ;
102+ // ADD, SUB on 32-bit register (implicitly zero-extends to 64-bit)
103+ case X86::ADD32rr:
104+ case X86::ADD32ri:
105+ case X86::ADD32rm:
106+ case X86::SUB32rr:
107+ case X86::SUB32ri:
108+ case X86::SUB32rm:
109+ case X86::LEA32r:
110+ return true ;
111+ default :
112+ // Any other definition might set upper bits, so not safe
113+ return false ;
114+ }
115+ }
116+
117+ // Check if this instruction modifies Reg (partial write or implicit use)
118+ if (TRI->regsOverlap (DefReg, Reg)) {
119+ // Partial register update - upper bits are unknown
120+ return false ;
121+ }
122+ }
123+
124+ // Check for implicit defs
125+ for (const MachineOperand &MO : Inst.implicit_operands ()) {
126+ if (MO.isReg () && MO.isDef () && TRI->regsOverlap (MO.getReg (), Reg)) {
127+ return false ;
128+ }
129+ }
130+ }
131+
132+ // Didn't find a definition in this block - check predecessors
133+ // If all predecessors define Reg with zero upper bits, it's safe
134+ if (MBB.pred_empty ())
135+ return false ;
136+
137+ // Check all predecessor blocks
138+ for (const MachineBasicBlock *Pred : MBB.predecessors ()) {
139+ bool FoundZeroExtend = false ;
140+
141+ // SAFETY CHECK: If the sub-register is live-in to the predecessor,
142+ // we make the CONSERVATIVE assumption that the parent register was
143+ // zero-extended in an earlier block.
144+ //
145+ // This is safe because:
146+ // 1. After register allocation, if $cl is live-in but $ecx is not,
147+ // it means only the low 8 bits are meaningful
148+ // 2. The register allocator ensures no other code modifies $ecx between
149+ // the zero-extension and this point (otherwise $ecx would be live)
150+ // 3. Any write to $ch or upper bits would show as a def of $ecx, which
151+ // would be found in our backward scan below and handled correctly
152+ //
153+ // However, this is still conservative - we should verify the actual
154+ // definition to be completely safe.
155+ Register SubReg8 = TRI->getSubReg (Reg, X86::sub_8bit);
156+ Register SubReg16 = TRI->getSubReg (Reg, X86::sub_16bit);
157+ bool SubRegLiveIn = (SubReg8 && Pred->isLiveIn (SubReg8)) ||
158+ (SubReg16 && Pred->isLiveIn (SubReg16));
159+
160+ if (SubRegLiveIn) {
161+ // Sub-register is live-in. We'll verify this is safe by checking
162+ // that no instructions in this block modify the parent register
163+ // before we reach the end (where control flows to our block).
164+ // If we find any such modification, we'll conservatively bail out.
165+ bool SafeToAssume = true ;
166+ for (const MachineInstr &Inst : *Pred) {
167+ for (const MachineOperand &MO : Inst.operands ()) {
168+ if (MO.isReg () && MO.isDef ()) {
169+ Register DefReg = MO.getReg ();
170+ // Check if this modifies Reg or overlaps with it (partial write)
171+ if ((DefReg == Reg || TRI->regsOverlap (DefReg, Reg)) &&
172+ DefReg != SubReg8 && DefReg != SubReg16) {
173+ // Found a write to the parent register or overlapping register
174+ // that's not just the sub-register we expect
175+ SafeToAssume = false ;
176+ break ;
177+ }
178+ }
179+ }
180+ if (!SafeToAssume)
181+ break ;
182+ }
183+
184+ if (SafeToAssume) {
185+ FoundZeroExtend = true ;
186+ goto next_predecessor;
187+ }
188+ }
189+
190+ // Walk backward through predecessor to find last definition of Reg
191+ for (const MachineInstr &Inst : llvm::reverse (*Pred)) {
192+ // Check if this instruction defines Reg
193+ for (const MachineOperand &MO : Inst.operands ()) {
194+ if (!MO.isReg () || !MO.isDef ())
195+ continue ;
196+
197+ Register DefReg = MO.getReg ();
198+ if (DefReg == Reg || TRI->isSuperRegister (Reg, DefReg)) {
199+ // Found a definition - check if it zeros upper bits
200+ unsigned Opc = Inst.getOpcode ();
201+ switch (Opc) {
202+ case X86::MOVZX32rm8:
203+ case X86::MOVZX32rr8:
204+ case X86::MOVZX32rm16:
205+ case X86::MOVZX32rr16:
206+ case X86::MOV32r0:
207+ case X86::ADD32rr:
208+ case X86::ADD32ri:
209+ case X86::ADD32rm:
210+ case X86::SUB32rr:
211+ case X86::SUB32ri:
212+ case X86::SUB32rm:
213+ case X86::LEA32r:
214+ FoundZeroExtend = true ;
215+ break ;
216+ case X86::XOR32rr:
217+ if (Inst.getOperand (1 ).getReg () == Inst.getOperand (2 ).getReg ())
218+ FoundZeroExtend = true ;
219+ break ;
220+ default :
221+ // Found a definition that doesn't zero upper bits
222+ return false ;
223+ }
224+ // Found the definition in this predecessor
225+ goto next_predecessor;
226+ }
227+
228+ // Check for partial register updates
229+ if (TRI->regsOverlap (DefReg, Reg)) {
230+ return false ;
231+ }
232+ }
233+ }
234+
235+ next_predecessor:
236+ // If we didn't find a zero-extending definition in this predecessor, fail
237+ if (!FoundZeroExtend)
238+ return false ;
239+ }
240+
241+ // All predecessors have zero-extending definitions
242+ return true ;
243+ }
244+
245+ bool EliminateRedundantZeroExtend::tryEliminateRedundantZeroExtend (
246+ MachineInstr &MI, MachineBasicBlock &MBB) const {
247+ unsigned Opc = MI.getOpcode ();
248+
249+ // Only handle MOVZX32rr8 for now (can extend to MOVZX32rr16 later)
250+ if (Opc != X86::MOVZX32rr8)
251+ return false ;
252+
253+ Register DstReg = MI.getOperand (0 ).getReg ();
254+ Register SrcReg = MI.getOperand (1 ).getReg ();
255+
256+ // Check if source is a sub-register of destination
257+ // e.g., CL is sub-register of ECX
258+ if (!TRI->isSubRegister (DstReg, SrcReg))
259+ return false ;
260+
261+ // Check if destination's upper bits are already zero
262+ if (!hasZeroUpperBits (DstReg, MI, MBB))
263+ return false ;
264+
265+ // The MOVZX is redundant! Since SrcReg is part of DstReg and DstReg's
266+ // upper bits are already zero, this instruction does nothing.
267+ LLVM_DEBUG (dbgs () << " Eliminating redundant zero-extend: " << MI);
268+ MI.eraseFromParent ();
269+ return true ;
270+ }
271+
272+ bool EliminateRedundantZeroExtend::runOnMachineFunction (MachineFunction &MF) {
273+ TII = MF.getSubtarget <X86Subtarget>().getInstrInfo ();
274+ TRI = MF.getSubtarget <X86Subtarget>().getRegisterInfo ();
275+
276+ bool Changed = false ;
277+
278+ for (MachineBasicBlock &MBB : MF) {
279+ // Iterate through instructions - use a worklist to handle erasures
280+ SmallVector<MachineInstr *, 4 > ToErase;
281+
282+ for (MachineInstr &MI : MBB) {
283+ if (tryEliminateRedundantZeroExtend (MI, MBB)) {
284+ Changed = true ;
285+ // Note: MI is already erased in tryEliminateRedundantZeroExtend
286+ break ; // Restart iteration for this block
287+ }
288+ }
289+ }
290+
291+ return Changed;
292+ }
0 commit comments