AMDGPU: Use isLiteralConstantLike to check whether the operand could ever be literal

Summary:
  To compute the size of a VALU/SALU instruction, we need to check whether an operand
could ever be literal. Previously isLiteralConstant was used, which missed cases
like global variables or external symbols. These misses lead to under-estimation of
the instruction size and branch offset, and thus incorrectly skip the necessary branch
relaxation when the branch offset is actually greater than what the branch bits can hold.
In this work, we use isLiteralConstantLike to check the operands. It maybe conservative,
but it is safe.

Reviewers: arsenm

Differential Revision: https://reviews.llvm.org/D122778
This commit is contained in:
Changpeng Fang 2022-03-31 08:06:31 -07:00
parent 0a460416e6
commit 1711020c37
3 changed files with 44 additions and 1 deletions

View file

@ -7461,7 +7461,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return DescSize;
bool HasLiteral = false;
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
if (isLiteralConstant(MI, I)) {
const MachineOperand &Op = MI.getOperand(I);
const MCOperandInfo &OpInfo = Desc.OpInfo[I];
if (isLiteralConstantLike(Op, OpInfo)) {
HasLiteral = true;
break;
}

View file

@ -520,6 +520,7 @@ def : GCNPat<
def SI_CALL : SPseudoInstSI <
(outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
let Size = 4;
let FixedSize = 1;
let isCall = 1;
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
@ -532,6 +533,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
(ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff),
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
let Size = 4;
let FixedSize = 1;
let isCall = 1;
let isTerminator = 1;
let isReturn = 1;

View file

@ -0,0 +1,39 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 < %s | FileCheck -check-prefix=GCN %s
; Restrict maximum branch to between +31 and -32 dwords
declare void @llvm.amdgcn.s.sleep(i32) #0
@name1 = external addrspace(1) global i32
@name2 = external addrspace(1) global i32
@name3 = external addrspace(1) global i32
; GCN-LABEL: {{^}}branch_offset_test:
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
; GCN-NEXT: s_cbranch_scc0 [[BB2:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}: ; %bb
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB3]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[BB2]]: ; %bb2
; GCN-NEXT: s_getpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN: [[BB3]]: ; %bb3
define amdgpu_kernel void @branch_offset_test(i32 addrspace(1)* %arg, i32 %cnd) #0 {
bb:
%cmp = icmp eq i32 %cnd, 0
br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
bb2:
store i32 1, i32 addrspace(1)* @name1
store i32 2, i32 addrspace(1)* @name2
store i32 3, i32 addrspace(1)* @name3
call void @llvm.amdgcn.s.sleep(i32 0)
br label %bb3
bb3:
store volatile i32 %cnd, i32 addrspace(1)* %arg
ret void
}