[OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 1
Summary: Allow AMDGCN as a GPU offloading target for OpenMP during compiler invocation and allow setting CUDAMode for it. Originally authored by Greg Rodgers (@gregrodgers). Reviewers: ronlieb, yaxunl, b-sumner, scchan, JonChesterfield, jdoerfert, sameerds, msearles, hliao, arsenm Reviewed By: sameerds Subscribers: sstefan1, jvesely, wdng, arsenm, guansong, dexonsmith, cfe-commits, llvm-commits, gregrodgers Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D79754
This commit is contained in:
parent
0b5d81e6bb
commit
602d9b0afc
|
@ -3224,6 +3224,15 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
|
|||
!(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
|
||||
return 0;
|
||||
|
||||
// As AMDGCN implementation of OpenMP does not have a device-side standard
|
||||
// library, none of the predefined library functions except printf and malloc
|
||||
// should be treated as a builtin i.e. 0 should be returned for them.
|
||||
if (Context.getTargetInfo().getTriple().isAMDGCN() &&
|
||||
Context.getLangOpts().OpenMPIsDevice &&
|
||||
Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) &&
|
||||
!(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
|
||||
return 0;
|
||||
|
||||
return BuiltinID;
|
||||
}
|
||||
|
||||
|
|
|
@ -3109,7 +3109,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
|
||||
// Set the flag to prevent the implementation from emitting device exception
|
||||
// handling code for those requiring so.
|
||||
if ((Opts.OpenMPIsDevice && T.isNVPTX()) || Opts.OpenCLCPlusPlus) {
|
||||
if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) ||
|
||||
Opts.OpenCLCPlusPlus) {
|
||||
Opts.Exceptions = 0;
|
||||
Opts.CXXExceptions = 0;
|
||||
}
|
||||
|
@ -3143,6 +3144,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
TT.getArch() == llvm::Triple::ppc64le ||
|
||||
TT.getArch() == llvm::Triple::nvptx ||
|
||||
TT.getArch() == llvm::Triple::nvptx64 ||
|
||||
TT.getArch() == llvm::Triple::amdgcn ||
|
||||
TT.getArch() == llvm::Triple::x86 ||
|
||||
TT.getArch() == llvm::Triple::x86_64))
|
||||
Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i);
|
||||
|
@ -3160,13 +3162,13 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
<< Opts.OMPHostIRFile;
|
||||
}
|
||||
|
||||
// Set CUDA mode for OpenMP target NVPTX if specified in options
|
||||
Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
|
||||
// Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options
|
||||
Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) &&
|
||||
Args.hasArg(options::OPT_fopenmp_cuda_mode);
|
||||
|
||||
// Set CUDA mode for OpenMP target NVPTX if specified in options
|
||||
// Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options
|
||||
Opts.OpenMPCUDAForceFullRuntime =
|
||||
Opts.OpenMPIsDevice && T.isNVPTX() &&
|
||||
Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) &&
|
||||
Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime);
|
||||
|
||||
// Record whether the __DEPRECATED define was requested.
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: powerpc-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
|
@ -254,24 +255,40 @@
|
|||
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
|
||||
// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
|
||||
// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
|
||||
// CUDA_MODE-SAME: "-fopenmp-cuda-mode"
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
|
||||
// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
|
||||
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
|
||||
// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
|
||||
// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
|
||||
// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime"
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
|
||||
// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime"
|
||||
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
|
||||
|
|
27
clang/test/OpenMP/amdgcn_device_function_call.cpp
Normal file
27
clang/test/OpenMP/amdgcn_device_function_call.cpp
Normal file
|
@ -0,0 +1,27 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
|
||||
// RUN: llvm-dis < %t-ppc-host.bc | FileCheck %s -check-prefix=HOST
|
||||
|
||||
// device side declarations
|
||||
#pragma omp declare target
|
||||
extern "C" float cosf(float __x);
|
||||
#pragma omp end declare target
|
||||
|
||||
// host side declaration
|
||||
extern "C" float cosf(float __x);
|
||||
|
||||
void test_amdgcn_openmp_device(float __x) {
|
||||
// the default case where predefined library functions are treated as
|
||||
// builtins on the host
|
||||
// HOST: call float @llvm.cos.f32(float
|
||||
__x = cosf(__x);
|
||||
|
||||
#pragma omp target
|
||||
{
|
||||
// cosf should not be treated as builtin on device
|
||||
// CHECK-NOT: call float @llvm.cos.f32(float
|
||||
__x = cosf(__x);
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
/// Make sure no exception messages are inclided in the llvm output.
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION
|
||||
|
||||
void test_increment() {
|
||||
#pragma omp target
|
||||
|
|
|
@ -692,6 +692,9 @@ public:
|
|||
return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
|
||||
}
|
||||
|
||||
/// Tests whether the target is AMDGCN
|
||||
bool isAMDGCN() const { return getArch() == Triple::amdgcn; }
|
||||
|
||||
bool isAMDGPU() const {
|
||||
return getArch() == Triple::r600 || getArch() == Triple::amdgcn;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue