[PowerPC] Fix use check of swap-reduction

This will fix swap-reduction in DAGISel for cases where COPY_TO_REGCLASS
has multiple uses.
This commit is contained in:
Qiu Chaofan 2021-04-07 15:55:52 +08:00
parent 030ac786d4
commit 033c9c2552
2 changed files with 34 additions and 5 deletions

View file

@ -6903,19 +6903,22 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
// TODO: Can we put this a common method for DAG?
auto SkipRCCopy = [](SDValue V) {
while (V->isMachineOpcode() &&
V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS)
V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
// All values in the chain should have single use.
if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
return SDValue();
V = V->getOperand(0);
return V;
}
return V.hasOneUse() ? V : SDValue();
};
SDValue VecOp = SkipRCCopy(N->getOperand(0));
if (!isLaneInsensitive(VecOp) || !VecOp.hasOneUse())
if (!VecOp || !isLaneInsensitive(VecOp))
return;
SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
RHS = SkipRCCopy(VecOp.getOperand(1));
if (!LHS.hasOneUse() || !RHS.hasOneUse() || !isVSXSwap(LHS) ||
!isVSXSwap(RHS))
if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
return;
// These swaps may still have chain-uses here, count on dead code elimination

View file

@ -83,5 +83,31 @@ entry:
ret i16 %2
}
define signext i32 @vecop_uses2([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
; CHECK-LABEL: vecop_uses2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: lxvd2x 1, 0, 4
; CHECK-NEXT: xxswapd 34, 0
; CHECK-NEXT: xxswapd 35, 1
; CHECK-NEXT: xxsldwi 0, 34, 34, 3
; CHECK-NEXT: vmuluwm 2, 3, 2
; CHECK-NEXT: mffprwz 3, 0
; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: extsw 3, 3
; CHECK-NEXT: stxvd2x 0, 0, 5
; CHECK-NEXT: blr
entry:
%0 = bitcast [4 x i32]* %a to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 4
%2 = bitcast [4 x i32]* %b to <4 x i32>*
%3 = load <4 x i32>, <4 x i32>* %2, align 4
%4 = mul <4 x i32> %3, %1
%5 = bitcast [4 x i32]* %c to <4 x i32>*
store <4 x i32> %4, <4 x i32>* %5, align 4
%6 = extractelement <4 x i32> %1, i32 3
ret i32 %6
}
declare <16 x i8> @llvm.ppc.altivec.vavgsb(<16 x i8>, <16 x i8>)
declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)