[Diagnostics] Allow emitting analysis and missed remarks on functions

Summary:
Currently, only `OptimizationRemarks` can be emitted using a Function.
Add constructors to allow this for `OptimizationRemarksAnalysis` and
`OptimizationRemarkMissed` as well.

Reviewed By: jdoerfert thegameg

Differential Revision: https://reviews.llvm.org/D102784
This commit is contained in:
Joseph Huber 2021-05-19 12:19:50 -04:00
parent 9b59a61cfc
commit 2db182ff8d
8 changed files with 107 additions and 86 deletions

View file

@ -1,6 +1,6 @@
// RUN: %clang_cc1 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -verify=host -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// host-no-diagnostics
@ -96,5 +96,5 @@ void spmd(void) {
}
}
// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to}}
// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}}
// all-remark@* 12 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}

View file

@ -1,6 +1,6 @@
// RUN: %clang_cc1 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -verify=host -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
// host-no-diagnostics
@ -43,5 +43,5 @@ void spmd(void) {
}
}
// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}}
// expected-remark@* 2 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}

View file

@ -743,6 +743,11 @@ public:
OptimizationRemarkMissed(const char *PassName, StringRef RemarkName,
const Instruction *Inst);
/// Same as above but \p F is used to derive code region and debug
/// location.
OptimizationRemarkMissed(const char *PassName, StringRef RemarkName,
const Function *F);
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_OptimizationRemarkMissed;
}
@ -795,6 +800,11 @@ public:
OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
const Instruction *Inst);
/// Same as above but \p F is used to derive code region and debug
/// location.
OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
const Function *F);
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_OptimizationRemarkAnalysis;
}

View file

@ -291,6 +291,13 @@ OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
*Inst->getParent()->getParent(),
Inst->getDebugLoc(), Inst->getParent()) {}
OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
StringRef RemarkName,
const Function *Func)
: DiagnosticInfoIROptimization(
DK_OptimizationRemarkMissed, DS_Remark, PassName, RemarkName, *Func,
Func->getSubprogram(), getFirstFunctionBlock(Func)) {}
bool OptimizationRemarkMissed::isEnabled() const {
const Function &Fn = getFunction();
LLVMContext &Ctx = Fn.getContext();
@ -319,6 +326,13 @@ OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(
*cast<BasicBlock>(CodeRegion)->getParent(),
Loc, CodeRegion) {}
OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
StringRef RemarkName,
const Function *Func)
: DiagnosticInfoIROptimization(
DK_OptimizationRemarkAnalysis, DS_Remark, PassName, RemarkName, *Func,
Func->getSubprogram(), getFirstFunctionBlock(Func)) {}
bool OptimizationRemarkAnalysis::isEnabled() const {
const Function &Fn = getFunction();
LLVMContext &Ctx = Fn.getContext();

View file

@ -581,15 +581,15 @@ struct OpenMPOpt {
for (Function *F : OMPInfoCache.ModuleSlice) {
for (auto ICV : ICVs) {
auto ICVInfo = OMPInfoCache.ICVs[ICV];
auto Remark = [&](OptimizationRemark OR) {
return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
<< " Value: "
<< (ICVInfo.InitValue
? ICVInfo.InitValue->getValue().toString(10, true)
: "IMPLEMENTATION_DEFINED");
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
<< " Value: "
<< (ICVInfo.InitValue
? ICVInfo.InitValue->getValue().toString(10, true)
: "IMPLEMENTATION_DEFINED");
};
emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
}
}
}
@ -600,12 +600,12 @@ struct OpenMPOpt {
if (!OMPInfoCache.Kernels.count(F))
continue;
auto Remark = [&](OptimizationRemark OR) {
return OR << "OpenMP GPU kernel "
<< ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "OpenMP GPU kernel "
<< ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
};
emitRemarkOnFunction(F, "OpenMPGPU", Remark);
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
}
}
@ -1419,12 +1419,11 @@ private:
continue;
auto Remark = [&](OptimizationRemark OR) {
auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
return OR << "OpenMP runtime call "
<< ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
<< ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
<< ore::NV("OpenMPOptRuntime", RFI.Name)
<< " moved to beginning of OpenMP region";
};
emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark);
CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
ReplVal = CI;
@ -1457,7 +1456,7 @@ private:
return OR << "OpenMP runtime call "
<< ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
};
emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark);
CGUpdater.removeCallSite(*CI);
CI->replaceAllUsesWith(ReplVal);
@ -1558,28 +1557,22 @@ private:
///
/// The remark is built using a callback function provided by the caller that
/// takes a RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind,
typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
void emitRemark(Instruction *Inst, StringRef RemarkName,
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
Function *F = Inst->getParent()->getParent();
Function *F = I->getParent()->getParent();
auto &ORE = OREGetter(F);
ORE.emit(
[&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
}
/// Emit a remark on a function. Since only OptimizationRemark is supporting
/// this, it can't be made generic.
void
emitRemarkOnFunction(Function *F, StringRef RemarkName,
function_ref<OptimizationRemark(OptimizationRemark &&)>
&&RemarkCB) const {
/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
auto &ORE = OREGetter(F);
ORE.emit([&]() {
return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
});
ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
}
/// The underlying module.
@ -1672,10 +1665,11 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
if (!F.hasLocalLinkage()) {
// See https://openmp.llvm.org/remarks/OptimizationRemarks.html
auto Remark = [&](OptimizationRemark OR) {
return OR << "[OMP100] Potentially unknown OpenMP target region caller";
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA
<< "[OMP100] Potentially unknown OpenMP target region caller";
};
emitRemarkOnFunction(&F, "OMP100", Remark);
emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
return nullptr;
}
@ -1768,15 +1762,16 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
continue;
{
auto Remark = [&](OptimizationRemark OR) {
return OR << "Found a parallel region that is called in a target "
"region but not part of a combined target construct nor "
"nested inside a target construct without intermediate "
"code. This can lead to excessive register usage for "
"unrelated target regions in the same translation unit "
"due to spurious call edges assumed by ptxas.";
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Found a parallel region that is called in a target "
"region but not part of a combined target construct nor "
"nested inside a target construct without intermediate "
"code. This can lead to excessive register usage for "
"unrelated target regions in the same translation unit "
"due to spurious call edges assumed by ptxas.";
};
emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
Remark);
}
// If this ever hits, we should investigate.
@ -1785,12 +1780,13 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
if (UnknownUse || NumDirectCalls != 1 ||
ToBeReplacedStateMachineUses.size() != 2) {
{
auto Remark = [&](OptimizationRemark OR) {
return OR << "Parallel region is used in "
<< (UnknownUse ? "unknown" : "unexpected")
<< " ways; will not attempt to rewrite the state machine.";
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Parallel region is used in "
<< (UnknownUse ? "unknown" : "unexpected")
<< " ways; will not attempt to rewrite the state machine.";
};
emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
emitRemark<OptimizationRemarkAnalysis>(
F, "OpenMPParallelRegionInNonSPMD", Remark);
}
continue;
}
@ -1800,14 +1796,14 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
Kernel K = getUniqueKernelFor(*F);
if (!K) {
{
auto Remark = [&](OptimizationRemark OR) {
return OR << "Parallel region is not known to be called from a "
"unique single target region, maybe the surrounding "
"function has external linkage?; will not attempt to "
"rewrite the state machine use.";
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Parallel region is not known to be called from a "
"unique single target region, maybe the surrounding "
"function has external linkage?; will not attempt to "
"rewrite the state machine use.";
};
emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
Remark);
emitRemark<OptimizationRemarkAnalysis>(
F, "OpenMPParallelRegionInMultipleKernesl", Remark);
}
continue;
}
@ -1818,25 +1814,26 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
// ensures only direct calls to the function are left.
{
auto RemarkParalleRegion = [&](OptimizationRemark OR) {
return OR << "Specialize parallel region that is only reached from a "
"single target region to avoid spurious call edges and "
"excessive register usage in other target regions. "
"(parallel region ID: "
<< ore::NV("OpenMPParallelRegion", F->getName())
<< ", kernel ID: "
<< ore::NV("OpenMPTargetRegion", K->getName()) << ")";
auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Specialize parallel region that is only reached from a "
"single target region to avoid spurious call edges and "
"excessive register usage in other target regions. "
"(parallel region ID: "
<< ore::NV("OpenMPParallelRegion", F->getName())
<< ", kernel ID: "
<< ore::NV("OpenMPTargetRegion", K->getName()) << ")";
};
emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
RemarkParalleRegion);
auto RemarkKernel = [&](OptimizationRemark OR) {
return OR << "Target region containing the parallel region that is "
"specialized. (parallel region ID: "
<< ore::NV("OpenMPParallelRegion", F->getName())
<< ", kernel ID: "
<< ore::NV("OpenMPTargetRegion", K->getName()) << ")";
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
RemarkParalleRegion);
auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Target region containing the parallel region that is "
"specialized. (parallel region ID: "
<< ore::NV("OpenMPParallelRegion", F->getName())
<< ", kernel ID: "
<< ore::NV("OpenMPTargetRegion", K->getName()) << ")";
};
emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD",
RemarkKernel);
}
Module &M = *F->getParent();

View file

@ -10,9 +10,9 @@ target triple = "x86_64-pc-linux-gnu"
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num moved to deduplication_remarks.c:5:10
; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
; CHECK: remark: deduplication_remarks.c:5:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region
; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated
; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated
define dso_local void @deduplicate() local_unnamed_addr !dbg !14 {
%1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21
call void @useI32(i32 %1), !dbg !23

View file

@ -1,5 +1,5 @@
; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
; RUN: opt -passes=openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
; RUN: opt -openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel1
; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel2

View file

@ -1,5 +1,5 @@
; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -passes=openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s
; ModuleID = 'icv_remarks.c'
source_filename = "icv_remarks.c"