From a3e38b4a206b07077f964e8e0a32c4c39714ac42 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 24 Jul 2022 12:00:31 +0100 Subject: [PATCH] [DAG] SimplifyDemandedVectorElts - if every and/mul element-pair has a zero/undef then just constant fold to zero --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 ++++++- llvm/test/CodeGen/X86/vector-partial-undef.ll | 4 +--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8f71b9f49ad4..cd4f0ae42bcd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3364,6 +3364,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( TLO, Depth + 1)) return true; + // If every element pair has a zero/undef then just fold to zero. + // fold (and x, undef) -> 0 / (and x, 0) -> 0 + // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 + if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros @@ -3373,7 +3379,6 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef &= ~KnownZero; // Attempt to avoid multi-use ops if we don't need anything from them. - // TODO - use KnownUndef to relax the demandedelts? if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll index cd4a77e3d471..ffd95b38dc31 100644 --- a/llvm/test/CodeGen/X86/vector-partial-undef.ll +++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll @@ -85,9 +85,7 @@ define <4 x i64> @and_undef_elts(<2 x i64> %x) { ; ; AVX-LABEL: and_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2] -; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> %bogus_bo = and <4 x i64> %extend,