[AArch64][GlobalISel] Fix extended shift addressing mode selection not handling sxth.

The complex pattern for extended shift offsets only allow sxtw as the extend,
not sxth. Our equivalent function to do this was not rejecting SXTH so we
were miscompiling. This was exposed by D81992.
This commit is contained in:
Amara Emerson 2020-06-25 17:21:37 -07:00
parent f441313464
commit 97a34b5f8d
2 changed files with 107 additions and 1 deletions

View file

@ -4880,6 +4880,17 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
switch (Type) {
case AArch64_AM::SXTB:
case AArch64_AM::SXTH:
case AArch64_AM::SXTW:
return true;
default:
return false;
}
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectExtendedSHL(
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
@ -4952,7 +4963,10 @@ AArch64InstructionSelector::selectExtendedSHL(
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
SignExtend = Ext == AArch64_AM::SXTW;
SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
// We only support SXTW for signed extension here.
if (SignExtend && Ext != AArch64_AM::SXTW)
return None;
// Need a 32-bit wide register here.
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));

View file

@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
@x = global i32 -32768, align 4
define i32 @check_sext_not_lost(i32* %ptr) {
entry:
%ptr.addr = alloca i32*, align 8
store i32* %ptr, i32** %ptr.addr, align 8
%0 = load i32*, i32** %ptr.addr, align 8
%1 = load i32, i32* @x, align 4
%sub = sub nsw i32 %1, 32768
%conv = trunc i32 %sub to i16
%idxprom = sext i16 %conv to i64
%arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom
%2 = load i32, i32* %arrayidx, align 4
ret i32 %2
}
...
---
name: check_sext_not_lost
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
- { id: 2, class: gpr }
- { id: 3, class: gpr }
- { id: 4, class: gpr }
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: _ }
- { id: 8, class: gpr }
- { id: 9, class: _ }
- { id: 10, class: gpr }
- { id: 11, class: gpr }
- { id: 12, class: _ }
- { id: 13, class: gpr }
- { id: 14, class: gpr }
- { id: 15, class: gpr64 }
- { id: 16, class: gpr }
liveins:
- { reg: '$x0' }
frameInfo:
maxAlignment: 8
maxCallFrameSize: 0
stack:
- { id: 0, name: ptr.addr, size: 8, alignment: 8 }
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: check_sext_not_lost
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: STRXui [[COPY]], %stack.0.ptr.addr, 0 :: (store 8 into %ir.ptr.addr)
; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0.ptr.addr, 0 :: (dereferenceable load 8 from %ir.ptr.addr)
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @x
; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @x :: (dereferenceable load 4 from @x)
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = nsw SUBSWri [[LDRWui]], 8, 12, implicit-def $nzcv
; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[SUBSWri]], %subreg.sub_32
; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 15
; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[LDRXui]], [[SBFMXri]], 0, 1 :: (load 4 from %ir.arrayidx)
; CHECK: $w0 = COPY [[LDRWroX]]
; CHECK: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%1:gpr(p0) = G_FRAME_INDEX %stack.0.ptr.addr
G_STORE %0(p0), %1(p0) :: (store 8 into %ir.ptr.addr)
%2:gpr(p0) = G_LOAD %1(p0) :: (dereferenceable load 8 from %ir.ptr.addr)
%15:gpr64(p0) = ADRP target-flags(aarch64-page) @x
%4:gpr(p0) = G_ADD_LOW %15(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
%3:gpr(s32) = G_LOAD %4(p0) :: (dereferenceable load 4 from @x)
%5:gpr(s32) = G_CONSTANT i32 32768
%6:gpr(s32) = nsw G_SUB %3, %5
%16:gpr(s64) = G_ANYEXT %6(s32)
%8:gpr(s64) = G_SEXT_INREG %16, 16
%14:gpr(s64) = G_CONSTANT i64 2
%10:gpr(s64) = G_SHL %8, %14(s64)
%11:gpr(p0) = G_PTR_ADD %2, %10(s64)
%13:gpr(s32) = G_LOAD %11(p0) :: (load 4 from %ir.arrayidx)
$w0 = COPY %13(s32)
RET_ReallyLR implicit $w0
...