[AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.
When we have an any-extending FPR bank load, none of the tablegen patterns match and we fall back to the C++ selector. Like with the truncating stores that were fixed recently, the C++ wasn't able to handle it and ended up generating invalid copies between different size regclasses. This change adds handling for this case, splitting the load into a regular load and a SUBREG_TO_REG to extend it into the original wide destination reg.
This commit is contained in:
parent
f596acc74d
commit
a86bbe1e31
|
@ -2764,6 +2764,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
.getReg(0);
|
||||
RBI.constrainGenericRegister(Copy, *RC, MRI);
|
||||
LdSt.getOperand(0).setReg(Copy);
|
||||
} else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
|
||||
// If this is an any-extending load from the FPR bank, split it into a regular
|
||||
// load + extend.
|
||||
if (RB.getID() == AArch64::FPRRegBankID) {
|
||||
unsigned SubReg;
|
||||
LLT MemTy = LdSt.getMMO().getMemoryType();
|
||||
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
|
||||
if (!getSubRegForClass(RC, TRI, SubReg))
|
||||
return false;
|
||||
Register OldDst = LdSt.getReg(0);
|
||||
Register NewDst =
|
||||
MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
|
||||
LdSt.getOperand(0).setReg(NewDst);
|
||||
MRI.setRegBank(NewDst, RB);
|
||||
// Generate a SUBREG_TO_REG to extend it.
|
||||
MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
|
||||
MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
|
||||
.addImm(0)
|
||||
.addUse(NewDst)
|
||||
.addImm(SubReg);
|
||||
auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
|
||||
RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
|
||||
MIB.setInstr(LdSt);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper lambda for partially selecting I. Either returns the original
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
define void @load_4xi32(<4 x i32>* %ptr) { ret void }
|
||||
define void @load_8xi16(<8 x i16>* %ptr) { ret void }
|
||||
define void @load_16xi8(<16 x i8>* %ptr) { ret void }
|
||||
define void @anyext_on_fpr() { ret void }
|
||||
define void @anyext_on_fpr8() { ret void }
|
||||
|
||||
...
|
||||
|
||||
|
@ -638,3 +640,69 @@ body: |
|
|||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: anyext_on_fpr
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
- { reg: '$x2' }
|
||||
- { reg: '$w3' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w3, $x0, $x1, $x2
|
||||
|
||||
; CHECK-LABEL: name: anyext_on_fpr
|
||||
; CHECK: liveins: $w3, $x0, $x1, $x2
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16))
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRHui]], %subreg.hsub
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: $w0 = COPY [[COPY1]]
|
||||
; CHECK: RET_ReallyLR
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%16:fpr(s32) = G_LOAD %0(p0) :: (load (s16))
|
||||
%24:gpr(s32) = COPY %16(s32)
|
||||
$w0 = COPY %24(s32)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: anyext_on_fpr8
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
- { reg: '$x2' }
|
||||
- { reg: '$w3' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w3, $x0, $x1, $x2
|
||||
|
||||
; CHECK-LABEL: name: anyext_on_fpr8
|
||||
; CHECK: liveins: $w3, $x0, $x1, $x2
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRBui]], %subreg.bsub
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: $w0 = COPY [[COPY1]]
|
||||
; CHECK: RET_ReallyLR
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%16:fpr(s32) = G_LOAD %0(p0) :: (load (s8))
|
||||
%24:gpr(s32) = COPY %16(s32)
|
||||
$w0 = COPY %24(s32)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in a new issue