[AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.

When we have an any-extending FPR bank load, none of the tablegen patterns
match and we fall back to the C++ selector. Like with the truncating stores
that were fixed recently, the C++ wasn't able to handle it and ended up
generating invalid copies between different size regclasses.

This change adds handling for this case, splitting the load into a regular
load and a SUBREG_TO_REG to extend it into the original wide destination reg.
This commit is contained in:
Amara Emerson 2021-08-31 17:22:39 -07:00
parent f596acc74d
commit a86bbe1e31
2 changed files with 92 additions and 0 deletions

View file

@ -2764,6 +2764,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
} else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
// If this is an any-extending load from the FPR bank, split it into a regular
// load + extend.
if (RB.getID() == AArch64::FPRRegBankID) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
Register OldDst = LdSt.getReg(0);
Register NewDst =
MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
LdSt.getOperand(0).setReg(NewDst);
MRI.setRegBank(NewDst, RB);
// Generate a SUBREG_TO_REG to extend it.
MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
.addImm(0)
.addUse(NewDst)
.addImm(SubReg);
auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
MIB.setInstr(LdSt);
}
}
// Helper lambda for partially selecting I. Either returns the original

View file

@ -38,6 +38,8 @@
define void @load_4xi32(<4 x i32>* %ptr) { ret void }
define void @load_8xi16(<8 x i16>* %ptr) { ret void }
define void @load_16xi8(<16 x i8>* %ptr) { ret void }
define void @anyext_on_fpr() { ret void }
define void @anyext_on_fpr8() { ret void }
...
@ -638,3 +640,69 @@ body: |
RET_ReallyLR implicit $q0
...
---
name: anyext_on_fpr
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
- { reg: '$x2' }
- { reg: '$w3' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.1:
liveins: $w3, $x0, $x1, $x2
; CHECK-LABEL: name: anyext_on_fpr
; CHECK: liveins: $w3, $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16))
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRHui]], %subreg.hsub
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: $w0 = COPY [[COPY1]]
; CHECK: RET_ReallyLR
%0:gpr(p0) = COPY $x0
%16:fpr(s32) = G_LOAD %0(p0) :: (load (s16))
%24:gpr(s32) = COPY %16(s32)
$w0 = COPY %24(s32)
RET_ReallyLR
...
---
name: anyext_on_fpr8
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
- { reg: '$x2' }
- { reg: '$w3' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.1:
liveins: $w3, $x0, $x1, $x2
; CHECK-LABEL: name: anyext_on_fpr8
; CHECK: liveins: $w3, $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRBui]], %subreg.bsub
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: $w0 = COPY [[COPY1]]
; CHECK: RET_ReallyLR
%0:gpr(p0) = COPY $x0
%16:fpr(s32) = G_LOAD %0(p0) :: (load (s8))
%24:gpr(s32) = COPY %16(s32)
$w0 = COPY %24(s32)
RET_ReallyLR
...