[OpenMP][AMDGCN] Enable complex functions
This patch enables basic complex functionality using the ocml builtins. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108552
This commit is contained in:
parent
b7795eb646
commit
07e85823aa
|
@ -16,7 +16,7 @@
|
|||
// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
#ifdef __OPENMP_NVPTX__
|
||||
#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
|
||||
#pragma omp declare target
|
||||
#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
|
||||
#else
|
||||
|
@ -26,7 +26,7 @@
|
|||
// To make the algorithms available for C and C++ in CUDA and OpenMP we select
|
||||
// different but equivalent function versions. TODO: For OpenMP we currently
|
||||
// select the native builtins as the overload support for templates is lacking.
|
||||
#if !defined(__OPENMP_NVPTX__)
|
||||
#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
|
||||
#define _ISNANd std::isnan
|
||||
#define _ISNANf std::isnan
|
||||
#define _ISINFd std::isinf
|
||||
|
@ -276,7 +276,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
|
|||
#undef _fmaxd
|
||||
#undef _fmaxf
|
||||
|
||||
#ifdef __OPENMP_NVPTX__
|
||||
#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
|
||||
#pragma omp end declare target
|
||||
#endif
|
||||
|
||||
|
|
|
@ -17,9 +17,18 @@
|
|||
// We require std::math functions in the complex builtins below.
|
||||
#include <cmath>
|
||||
|
||||
#ifdef __NVPTX__
|
||||
#define __OPENMP_NVPTX__
|
||||
#include <__clang_cuda_complex_builtins.h>
|
||||
#undef __OPENMP_NVPTX__
|
||||
#endif // __NVPTX__
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#define __OPENMP_AMDGCN__
|
||||
#include <__clang_cuda_complex_builtins.h>
|
||||
#undef __OPENMP_AMDGCN__
|
||||
#endif // __AMDGCN__
|
||||
|
||||
#endif
|
||||
|
||||
// Grab the host header too.
|
||||
|
@ -43,4 +52,4 @@
|
|||
|
||||
#pragma omp end declare variant
|
||||
|
||||
#endif
|
||||
#endif // _LIBCPP_STD_VER
|
||||
|
|
|
@ -17,10 +17,19 @@
|
|||
// We require math functions in the complex builtins below.
|
||||
#include <math.h>
|
||||
|
||||
#ifdef __NVPTX__
|
||||
#define __OPENMP_NVPTX__
|
||||
#include <__clang_cuda_complex_builtins.h>
|
||||
#undef __OPENMP_NVPTX__
|
||||
#endif
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#define __OPENMP_AMDGCN__
|
||||
#include <__clang_cuda_complex_builtins.h>
|
||||
#undef __OPENMP_AMDGCN__
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Grab the host header too.
|
||||
#include_next <complex.h>
|
||||
|
|
50
clang/test/Headers/amdgcn-openmp-device-math-complex.c
Normal file
50
clang/test/Headers/amdgcn-openmp-device-math-complex.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
|
||||
// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
#include <complex.h>
|
||||
|
||||
void test_complex_f64(double _Complex a) {
|
||||
// CHECK-LABEL: define {{.*}}test_complex_f64
|
||||
#pragma omp target
|
||||
{
|
||||
// CHECK: call { double, double } @__divdc3
|
||||
// CHECK: call { double, double } @__muldc3
|
||||
(void)(a * (a / a));
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: define weak {{.*}} @__divdc3
|
||||
// CHECK-DAG: call double @__ocml_fabs_f64(
|
||||
// CHECK-DAG: call i32 @__ocml_isnan_f64(
|
||||
// CHECK-DAG: call i32 @__ocml_isfinite_f64(
|
||||
// CHECK-DAG: call double @__ocml_copysign_f64(
|
||||
// CHECK-DAG: call double @__ocml_scalbn_f64(
|
||||
// CHECK-DAG: call double @__ocml_logb_f64(
|
||||
|
||||
// CHECK: define weak {{.*}} @__muldc3
|
||||
// CHECK-DAG: call i32 @__ocml_isnan_f64(
|
||||
// CHECK-DAG: call i32 @__ocml_isinf_f64(
|
||||
// CHECK-DAG: call double @__ocml_copysign_f64(
|
||||
|
||||
void test_complex_f32(float _Complex a) {
|
||||
// CHECK-LABEL: define {{.*}}test_complex_f32
|
||||
#pragma omp target
|
||||
{
|
||||
// CHECK: call [2 x i32] @__divsc3
|
||||
// CHECK: call [2 x i32] @__mulsc3
|
||||
(void)(a * (a / a));
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: define weak {{.*}} @__divsc3
|
||||
// CHECK-DAG: call float @__ocml_fabs_f32(
|
||||
// CHECK-DAG: call i32 @__ocml_isnan_f32(
|
||||
// CHECK-DAG: call i32 @__ocml_isfinite_f32(
|
||||
// CHECK-DAG: call float @__ocml_copysign_f32(
|
||||
// CHECK-DAG: call float @__ocml_scalbn_f32(
|
||||
// CHECK-DAG: call float @__ocml_logb_f32(
|
||||
|
||||
// CHECK: define weak {{.*}} @__mulsc3
|
||||
// CHECK-DAG: call i32 @__ocml_isnan_f32(
|
||||
// CHECK-DAG: call i32 @__ocml_isinf_f32(
|
||||
// CHECK-DAG: call float @__ocml_copysign_f32(
|
Loading…
Reference in a new issue