[InstCombine] allow variable shift amount in bswap + shift fold

When shifting by a byte-multiple:
bswap (shl X, Y) --> lshr (bswap X), Y
bswap (lshr X, Y) --> shl (bswap X), Y

This was limited to constants as a first step in D122010 / 60820e53ec ,
but issue #55327 shows a source example (and there's a test based on that here)
where a variable shift amount is used in this pattern.
This commit is contained in:
Sanjay Patel 2022-05-18 14:34:48 -04:00
parent a3b30d22da
commit ebbc37391f
2 changed files with 22 additions and 17 deletions

View file

@ -1433,23 +1433,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
// inverse-shift-of-bswap:
// bswap (shl X, C) --> lshr (bswap X), C
// bswap (lshr X, C) --> shl (bswap X), C
// TODO: Use knownbits to allow variable shift and non-splat vector match.
BinaryOperator *BO;
if (match(IIOperand, m_OneUse(m_BinOp(BO)))) {
// bswap (shl X, Y) --> lshr (bswap X), Y
// bswap (lshr X, Y) --> shl (bswap X), Y
Value *X, *Y;
if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
// The transform allows undef vector elements, so try a constant match
// first. If knownbits can handle that case, that clause could be removed.
unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
const APInt *C;
if (match(BO, m_LogicalShift(m_Value(X), m_APIntAllowUndef(C))) &&
(*C & 7) == 0) {
if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
BinaryOperator::BinaryOps InverseShift =
BO->getOpcode() == Instruction::Shl ? Instruction::LShr
: Instruction::Shl;
return BinaryOperator::Create(InverseShift, NewSwap, BO->getOperand(1));
cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
? Instruction::LShr
: Instruction::Shl;
return BinaryOperator::Create(InverseShift, NewSwap, Y);
}
}

View file

@ -159,11 +159,14 @@ define i64 @swap_shl16_i64(i64 %x) {
ret i64 %r
}
; canonicalize shift after bswap if shift amount is multiple of 8-bits
; (including non-uniform vector elements)
define <2 x i32> @variable_lshr_v2i32(<2 x i32> %x, <2 x i32> %n) {
; CHECK-LABEL: @variable_lshr_v2i32(
; CHECK-NEXT: [[SHAMT:%.*]] = and <2 x i32> [[N:%.*]], <i32 -8, i32 -16>
; CHECK-NEXT: [[S:%.*]] = shl <2 x i32> [[X:%.*]], [[SHAMT]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[S]])
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], [[SHAMT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%shamt = and <2 x i32> %n, <i32 -8, i32 -16>
@ -172,15 +175,13 @@ define <2 x i32> @variable_lshr_v2i32(<2 x i32> %x, <2 x i32> %n) {
ret <2 x i32> %r
}
; PR55327
; PR55327 - swaps cancel
define i64 @variable_shl_i64(i64 %x, i64 %n) {
; CHECK-LABEL: @variable_shl_i64(
; CHECK-NEXT: [[B:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[X:%.*]])
; CHECK-NEXT: [[N8:%.*]] = shl i64 [[N:%.*]], 3
; CHECK-NEXT: [[SHAMT:%.*]] = and i64 [[N8]], 56
; CHECK-NEXT: [[S:%.*]] = shl i64 [[B]], [[SHAMT]]
; CHECK-NEXT: [[R:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[S]])
; CHECK-NEXT: [[R:%.*]] = lshr i64 [[X:%.*]], [[SHAMT]]
; CHECK-NEXT: ret i64 [[R]]
;
%b = tail call i64 @llvm.bswap.i64(i64 %x)
@ -191,6 +192,8 @@ define i64 @variable_shl_i64(i64 %x, i64 %n) {
ret i64 %r
}
; negative test - must have multiple of 8-bit shift amount
define i64 @variable_shl_not_masked_enough_i64(i64 %x, i64 %n) {
; CHECK-LABEL: @variable_shl_not_masked_enough_i64(
; CHECK-NEXT: [[SHAMT:%.*]] = and i64 [[N:%.*]], -4