[X86][SSE] Enable X86ISD::ANDNP constant folding

This commit is contained in:
Simon Pilgrim 2022-07-24 11:07:34 +01:00
parent 293899c64b
commit ce81a0df67
2 changed files with 21 additions and 8 deletions

View file

@ -51134,6 +51134,9 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
MVT VT = N->getSimpleValueType(0);
MVT SVT = VT.getScalarType();
int NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
// ANDNP(undef, x) -> 0
// ANDNP(x, undef) -> 0
@ -51152,6 +51155,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
if (SDValue Not = IsNOT(N0, DAG))
return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
// Constant Folding
APInt Undefs0, Undefs1;
SmallVector<APInt> EltBits0, EltBits1;
if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0) &&
getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) {
SDLoc DL(N);
SmallVector<APInt> ResultBits;
for (int I = 0; I != NumElts; ++I)
ResultBits.push_back(~EltBits0[I] & EltBits1[I]);
APInt ResultUndefs = APInt::getZero(NumElts);
return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL);
}
// TODO: Constant fold NOT(N0) to allow us to use AND.
// TODO: Do this in IsNOT with suitable oneuse checks?
@ -51166,8 +51182,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
APInt UndefElts;
SmallVector<APInt> EltBits;
int NumElts = VT.getVectorNumElements();
int EltSizeInBits = VT.getScalarSizeInBits();
APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
APInt DemandedElts = APInt::getAllOnes(NumElts);
if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,

View file

@ -22,12 +22,11 @@ define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
; CHECK-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm2, %zmm2
; CHECK-NEXT: vpsrlq $1, %zmm0, %zmm0
; CHECK-NEXT: vpsrlvq %zmm2, %zmm0, %zmm0
; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1
; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
; CHECK-NEXT: vmovdqa64 %zmm1, (%eax)
; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm2, %zmm2
; CHECK-NEXT: vpsllvq %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0
; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
; CHECK-NEXT: vmovdqa64 %zmm0, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
entry: