[ARM] Remove dead mov's in preheader of tail predicated loops

With t2DoLoopDec we can be left with some extra MOV's in the preheaders
of tail predicated loops. This removes them, in the same way we remove
other dead variables.

Differential Revision: https://reviews.llvm.org/D91857
This commit is contained in:
David Green 2021-02-11 10:48:20 +00:00
parent 6c47eafb39
commit 7786ac8377
8 changed files with 14 additions and 15 deletions

View file

@ -796,6 +796,20 @@ bool LowOverheadLoop::ValidateTailPredicate() {
ToRemove.insert(ElementChain.begin(), ElementChain.end());
}
}
// If we converted the LoopStart to a t2DoLoopStartTP, we can also remove any
// extra instructions in the preheader, which often includes a now unused MOV.
if (Start->getOpcode() == ARM::t2DoLoopStartTP && Preheader &&
!Preheader->empty() &&
!RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) {
if (auto *Def = RDA.getUniqueReachingMIDef(
&Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) {
SmallPtrSet<MachineInstr*, 2> Ignore;
Ignore.insert(VCTPs.begin(), VCTPs.end());
TryRemove(Def, RDA, ToRemove, Ignore);
}
}
return true;
}

View file

@ -10,7 +10,6 @@ define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x)
; CHECK-NEXT: ldrd r12, r2, [r0]
; CHECK-NEXT: ldrd r4, r3, [r0, #8]
; CHECK-NEXT: rsb r12, r12, r2, lsl #1
; CHECK-NEXT: mov r2, r12
; CHECK-NEXT: dlstp.16 lr, r12
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View file

@ -7,7 +7,6 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB0_1: @ %do.body.i
@ -19,7 +18,6 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: vcvt.f32.u32 s4, s4
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: vdiv.f32 s0, s0, s4
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov.i32 q0, #0x0

View file

@ -78,7 +78,6 @@ define void @nested(i32* nocapture readonly %x, i32* nocapture readnone %y, i32*
; CHECK-NEXT: @ %bb.5: @ %do.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
; CHECK-NEXT: bic r9, r7, #3
; CHECK-NEXT: mov r7, r5
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: add.w r8, r0, r9, lsl #2
; CHECK-NEXT: dlstp.32 lr, r5

View file

@ -447,7 +447,6 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: movw r3, :lower16:b
; CHECK-NEXT: movt r3, :upper16:b
; CHECK-NEXT: str r1, [r3]
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: dlstp.32 lr, r6
; CHECK-NEXT: .LBB1_10: @ %vector.body111
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
@ -462,7 +461,6 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: .LBB1_11: @ %vector.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
; CHECK-NEXT: vmov q1, q4
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: dlstp.32 lr, r6
; CHECK-NEXT: .LBB1_12: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1

View file

@ -24,7 +24,6 @@ define void @DCT_mve1(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: mov r4, r5
; CHECK-NEXT: dlstp.32 lr, r5
; CHECK-NEXT: .LBB0_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
@ -127,7 +126,6 @@ define void @DCT_mve2(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: mov r6, r12
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: .LBB1_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB1_2 Depth=1
@ -272,7 +270,6 @@ define void @DCT_mve3(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: mov r4, r10
; CHECK-NEXT: vmov q2, q0
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: mov r8, r7
; CHECK-NEXT: dlstp.32 lr, r7
; CHECK-NEXT: .LBB2_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB2_2 Depth=1
@ -448,7 +445,6 @@ define void @DCT_mve4(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmov q2, q0
; CHECK-NEXT: vmov q3, q0
; CHECK-NEXT: mov r10, r7
; CHECK-NEXT: dlstp.32 lr, r7
; CHECK-NEXT: .LBB3_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB3_2 Depth=1
@ -645,7 +641,6 @@ define void @DCT_mve5(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: vmov q3, q1
; CHECK-NEXT: vmov q2, q1
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: dlstp.32 lr, r7
; CHECK-NEXT: .LBB4_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB4_2 Depth=1
@ -864,7 +859,6 @@ define void @DCT_mve6(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float*
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q2, q1
; CHECK-NEXT: mov r9, r7
; CHECK-NEXT: dlstp.32 lr, r7
; CHECK-NEXT: .LBB5_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB5_2 Depth=1

View file

@ -739,7 +739,6 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16
; CHECK-NEXT: mla r3, r9, r2, r0
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload
; CHECK-NEXT: mov r11, r2
; CHECK-NEXT: mov r8, r10
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB5_7: @ %for.body24
@ -913,7 +912,6 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_
; CHECK-NEXT: mla r3, r9, r2, r0
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload
; CHECK-NEXT: mov r11, r2
; CHECK-NEXT: mov r8, r10
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB6_5: @ %for.body24

View file

@ -14,7 +14,6 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: vmov.i32 q3, #0x4
; CHECK-NEXT: mov r12, r1
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1