[AMDGPU] Handle legacy multiply-accumulate opcodes in convertToThreeAddress
Handle V_MAC_LEGACY_F32 and V_FMAC_LEGACY_F32 in convertToThreeAddress, to avoid the need for an extra mov instruction in some cases. Differential Revision: https://reviews.llvm.org/D120704
This commit is contained in:
parent
9ac3a85047
commit
289339140e
|
@ -3246,9 +3246,15 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||||
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
|
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
|
||||||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
|
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
|
||||||
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
|
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
|
||||||
|
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
|
||||||
|
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
|
||||||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
|
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
|
||||||
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
|
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
|
||||||
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
|
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
|
||||||
|
bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
|
||||||
|
Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
|
||||||
|
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
|
||||||
|
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
|
||||||
|
|
||||||
switch (Opc) {
|
switch (Opc) {
|
||||||
default:
|
default:
|
||||||
|
@ -3256,13 +3262,17 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||||
case AMDGPU::V_MAC_F16_e64:
|
case AMDGPU::V_MAC_F16_e64:
|
||||||
case AMDGPU::V_FMAC_F16_e64:
|
case AMDGPU::V_FMAC_F16_e64:
|
||||||
case AMDGPU::V_MAC_F32_e64:
|
case AMDGPU::V_MAC_F32_e64:
|
||||||
|
case AMDGPU::V_MAC_LEGACY_F32_e64:
|
||||||
case AMDGPU::V_FMAC_F32_e64:
|
case AMDGPU::V_FMAC_F32_e64:
|
||||||
|
case AMDGPU::V_FMAC_LEGACY_F32_e64:
|
||||||
case AMDGPU::V_FMAC_F64_e64:
|
case AMDGPU::V_FMAC_F64_e64:
|
||||||
break;
|
break;
|
||||||
case AMDGPU::V_MAC_F16_e32:
|
case AMDGPU::V_MAC_F16_e32:
|
||||||
case AMDGPU::V_FMAC_F16_e32:
|
case AMDGPU::V_FMAC_F16_e32:
|
||||||
case AMDGPU::V_MAC_F32_e32:
|
case AMDGPU::V_MAC_F32_e32:
|
||||||
|
case AMDGPU::V_MAC_LEGACY_F32_e32:
|
||||||
case AMDGPU::V_FMAC_F32_e32:
|
case AMDGPU::V_FMAC_F32_e32:
|
||||||
|
case AMDGPU::V_FMAC_LEGACY_F32_e32:
|
||||||
case AMDGPU::V_FMAC_F64_e32: {
|
case AMDGPU::V_FMAC_F64_e32: {
|
||||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
||||||
AMDGPU::OpName::src0);
|
AMDGPU::OpName::src0);
|
||||||
|
@ -3292,6 +3302,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||||
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||||
|
|
||||||
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
|
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
|
||||||
|
!IsLegacy &&
|
||||||
// If we have an SGPR input, we will violate the constant bus restriction.
|
// If we have an SGPR input, we will violate the constant bus restriction.
|
||||||
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
|
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
|
||||||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
|
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
|
||||||
|
@ -3361,10 +3372,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
|
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
|
||||||
: IsF64 ? AMDGPU::V_FMA_F64_e64
|
: IsF64 ? AMDGPU::V_FMA_F64_e64
|
||||||
: AMDGPU::V_FMA_F32_e64)
|
: IsLegacy
|
||||||
: (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64);
|
? AMDGPU::V_FMA_LEGACY_F32_e64
|
||||||
|
: AMDGPU::V_FMA_F32_e64
|
||||||
|
: IsF16 ? AMDGPU::V_MAD_F16_e64
|
||||||
|
: IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
|
||||||
|
: AMDGPU::V_MAD_F32_e64;
|
||||||
if (pseudoToMCOpcode(NewOpc) == -1)
|
if (pseudoToMCOpcode(NewOpc) == -1)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
|
|
@ -272,8 +272,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
|
||||||
; GFX6-LABEL: v_mad_legacy_f32:
|
; GFX6-LABEL: v_mad_legacy_f32:
|
||||||
; GFX6: ; %bb.0:
|
; GFX6: ; %bb.0:
|
||||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX6-NEXT: v_mac_legacy_f32_e32 v2, v0, v1
|
; GFX6-NEXT: v_mad_legacy_f32 v0, v0, v1, v2
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: v_mad_legacy_f32:
|
; GFX8-LABEL: v_mad_legacy_f32:
|
||||||
|
@ -292,8 +291,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
|
||||||
; GFX101: ; %bb.0:
|
; GFX101: ; %bb.0:
|
||||||
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX101-NEXT: v_mac_legacy_f32_e32 v2, v0, v1
|
; GFX101-NEXT: v_mad_legacy_f32 v0, v0, v1, v2
|
||||||
; GFX101-NEXT: v_mov_b32_e32 v0, v2
|
|
||||||
; GFX101-NEXT: s_setpc_b64 s[30:31]
|
; GFX101-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX103-LABEL: v_mad_legacy_f32:
|
; GFX103-LABEL: v_mad_legacy_f32:
|
||||||
|
|
|
@ -7,8 +7,7 @@ define float @v_fma(float %a, float %b, float %c) {
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
|
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GCN-NEXT: v_fmac_legacy_f32_e32 v2, v0, v1
|
; GCN-NEXT: v_fma_legacy_f32 v0, v0, v1, v2
|
||||||
; GCN-NEXT: v_mov_b32_e32 v0, v2
|
|
||||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||||
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
|
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
|
|
|
@ -65,10 +65,10 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}test_mad_legacy_f32_imm:
|
; GCN-LABEL: {{^}}test_mad_legacy_f32_imm:
|
||||||
; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
; GFX6: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||||
; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||||
; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||||
; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
|
; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
|
||||||
; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
|
; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
|
||||||
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||||
define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 {
|
define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 {
|
||||||
|
|
Loading…
Reference in a new issue