AMDGPU: Use isLiteralConstantLike to check whether the operand could ever be literal
Summary: To compute the size of a VALU/SALU instruction, we need to check whether an operand could ever be literal. Previously isLiteralConstant was used, which missed cases like global variables or external symbols. These misses lead to under-estimation of the instruction size and branch offset, and thus incorrectly skip the necessary branch relaxation when the branch offset is actually greater than what the branch bits can hold. In this work, we use isLiteralConstantLike to check the operands. It maybe conservative, but it is safe. Reviewers: arsenm Differential Revision: https://reviews.llvm.org/D122778
This commit is contained in:
parent
0a460416e6
commit
1711020c37
|
@ -7461,7 +7461,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
|||
return DescSize;
|
||||
bool HasLiteral = false;
|
||||
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
|
||||
if (isLiteralConstant(MI, I)) {
|
||||
const MachineOperand &Op = MI.getOperand(I);
|
||||
const MCOperandInfo &OpInfo = Desc.OpInfo[I];
|
||||
if (isLiteralConstantLike(Op, OpInfo)) {
|
||||
HasLiteral = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -520,6 +520,7 @@ def : GCNPat<
|
|||
def SI_CALL : SPseudoInstSI <
|
||||
(outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
|
||||
let Size = 4;
|
||||
let FixedSize = 1;
|
||||
let isCall = 1;
|
||||
let UseNamedOperandTable = 1;
|
||||
let SchedRW = [WriteBranch];
|
||||
|
@ -532,6 +533,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
|
|||
(ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff),
|
||||
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
|
||||
let Size = 4;
|
||||
let FixedSize = 1;
|
||||
let isCall = 1;
|
||||
let isTerminator = 1;
|
||||
let isReturn = 1;
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
|
||||
; Restrict maximum branch to between +31 and -32 dwords
|
||||
declare void @llvm.amdgcn.s.sleep(i32) #0
|
||||
|
||||
@name1 = external addrspace(1) global i32
|
||||
@name2 = external addrspace(1) global i32
|
||||
@name3 = external addrspace(1) global i32
|
||||
|
||||
; GCN-LABEL: {{^}}branch_offset_test:
|
||||
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[BB2:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}: ; %bb
|
||||
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB3]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
|
||||
; GCN-NEXT: [[BB2]]: ; %bb2
|
||||
; GCN-NEXT: s_getpc_b64 s[[[PC_LO]]:[[PC_HI]]]
|
||||
|
||||
; GCN: [[BB3]]: ; %bb3
|
||||
define amdgpu_kernel void @branch_offset_test(i32 addrspace(1)* %arg, i32 %cnd) #0 {
|
||||
bb:
|
||||
%cmp = icmp eq i32 %cnd, 0
|
||||
br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
|
||||
|
||||
bb2:
|
||||
store i32 1, i32 addrspace(1)* @name1
|
||||
store i32 2, i32 addrspace(1)* @name2
|
||||
store i32 3, i32 addrspace(1)* @name3
|
||||
call void @llvm.amdgcn.s.sleep(i32 0)
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
store volatile i32 %cnd, i32 addrspace(1)* %arg
|
||||
ret void
|
||||
}
|
Loading…
Reference in a new issue