[OpenMP] Add lower and upper bound in num_teams clause
This patch adds lower-bound and upper-bound to num_teams clause according to OpenMP 5.1 specification. The initial number of teams created is implementation defined, but it will be greater than or equal to lower-bound and less than or equal to upper-bound. If num_teams clause is not specified, the number of teams created is implementation defined, but it will be greater or equal to 1. Differential Revision: https://reviews.llvm.org/D95820
This commit is contained in:
parent
544cebd619
commit
4692bb4a8a
|
@ -360,6 +360,7 @@ kmpc_set_defaults 224
|
|||
__kmpc_team_static_init_4u 256
|
||||
__kmpc_team_static_init_8 257
|
||||
__kmpc_team_static_init_8u 258
|
||||
__kmpc_push_num_teams_51 284
|
||||
%endif
|
||||
|
||||
%ifndef stub
|
||||
|
|
|
@ -455,6 +455,7 @@ AffHWSubsetManyDies "KMP_HW_SUBSET ignored: too many Dies requested."
|
|||
AffUseGlobCpuidL "%1$s: Affinity capable, using global cpuid leaf %2$d info"
|
||||
AffNotCapableUseLocCpuidL "%1$s: Affinity not capable, using local cpuid leaf %2$d info"
|
||||
AffNotUsingHwloc "%1$s: Affinity not capable, using hwloc."
|
||||
FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
-*- HINTS -*-
|
||||
|
@ -512,7 +513,7 @@ BadExeFormat "System error #193 is \"Bad format of EXE or DLL fi
|
|||
"a file for another architecture. "
|
||||
"Check whether \"%1$s\" is a file for %2$s architecture."
|
||||
SystemLimitOnThreads "System-related limit on the number of threads."
|
||||
|
||||
SetNewBound "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause."
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
|
|
|
@ -3363,6 +3363,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
|
|||
kmp_proc_bind_t proc_bind);
|
||||
extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
|
||||
int num_threads);
|
||||
extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
|
||||
int num_teams_ub, int num_threads);
|
||||
|
||||
extern void __kmp_yield();
|
||||
|
||||
|
@ -3921,6 +3923,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
|
|||
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
|
||||
kmp_int32 num_teams,
|
||||
kmp_int32 num_threads);
|
||||
/* Function for OpenMP 5.1 num_teams clause */
|
||||
KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
|
||||
kmp_int32 num_teams_lb,
|
||||
kmp_int32 num_teams_ub,
|
||||
kmp_int32 num_threads);
|
||||
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
|
||||
kmpc_micro microtask, ...);
|
||||
struct kmp_dim { // loop bounds info casted to kmp_int64
|
||||
|
|
|
@ -351,6 +351,33 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
|
|||
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup PARALLEL
|
||||
@param loc source location information
|
||||
@param global_tid global thread number
|
||||
@param num_teams_lo lower bound on number of teams requested for the teams
|
||||
construct
|
||||
@param num_teams_up upper bound on number of teams requested for the teams
|
||||
construct
|
||||
@param num_threads number of threads per team requested for the teams construct
|
||||
|
||||
Set the number of teams to be used by the teams construct. The number of initial
|
||||
teams cretaed will be greater than or equal to the lower bound and less than or
|
||||
equal to the upper bound.
|
||||
This call is only required if the teams construct has a `num_teams` clause
|
||||
or a `thread_limit` clause (or both).
|
||||
*/
|
||||
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
|
||||
kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
|
||||
kmp_int32 num_threads) {
|
||||
KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
|
||||
" num_teams_ub=%d num_threads=%d\n",
|
||||
global_tid, num_teams_lb, num_teams_ub, num_threads));
|
||||
__kmp_assert_valid_gtid(global_tid);
|
||||
__kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
|
||||
num_threads);
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup PARALLEL
|
||||
@param loc source location information
|
||||
|
|
|
@ -7425,6 +7425,63 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
|
|||
thr->th.th_set_nproc = num_threads;
|
||||
}
|
||||
|
||||
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
|
||||
int num_threads) {
|
||||
KMP_DEBUG_ASSERT(thr);
|
||||
// Remember the number of threads for inner parallel regions
|
||||
if (!TCR_4(__kmp_init_middle))
|
||||
__kmp_middle_initialize(); // get internal globals calculated
|
||||
KMP_DEBUG_ASSERT(__kmp_avail_proc);
|
||||
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
|
||||
|
||||
if (num_threads == 0) {
|
||||
if (__kmp_teams_thread_limit > 0) {
|
||||
num_threads = __kmp_teams_thread_limit;
|
||||
} else {
|
||||
num_threads = __kmp_avail_proc / num_teams;
|
||||
}
|
||||
// adjust num_threads w/o warning as it is not user setting
|
||||
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
|
||||
// no thread_limit clause specified - do not change thread-limit-var ICV
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
|
||||
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
|
||||
} // prevent team size to exceed thread-limit-var
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
num_threads = __kmp_teams_max_nth / num_teams;
|
||||
}
|
||||
if (num_threads == 0) {
|
||||
num_threads = 1;
|
||||
}
|
||||
} else {
|
||||
// This thread will be the master of the league masters
|
||||
// Store new thread limit; old limit is saved in th_cg_roots list
|
||||
thr->th.th_current_task->td_icvs.thread_limit = num_threads;
|
||||
// num_threads = min(num_threads, nthreads-var)
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
int new_threads = __kmp_teams_max_nth / num_teams;
|
||||
if (new_threads == 0) {
|
||||
new_threads = 1;
|
||||
}
|
||||
if (new_threads != num_threads) {
|
||||
if (!__kmp_reserve_warn) { // user asked for too many threads
|
||||
__kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
|
||||
__kmp_msg(kmp_ms_warning,
|
||||
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
|
||||
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
|
||||
}
|
||||
}
|
||||
num_threads = new_threads;
|
||||
}
|
||||
}
|
||||
thr->th.th_teams_size.nth = num_threads;
|
||||
}
|
||||
|
||||
/* this sets the requested number of teams for the teams region and/or
|
||||
the number of threads for the next parallel region encountered */
|
||||
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
|
||||
|
@ -7453,49 +7510,64 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
|
|||
// teams)
|
||||
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
|
||||
|
||||
// Remember the number of threads for inner parallel regions
|
||||
if (!TCR_4(__kmp_init_middle))
|
||||
__kmp_middle_initialize(); // get internal globals calculated
|
||||
KMP_DEBUG_ASSERT(__kmp_avail_proc);
|
||||
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
|
||||
if (num_threads == 0) {
|
||||
if (__kmp_teams_thread_limit > 0) {
|
||||
num_threads = __kmp_teams_thread_limit;
|
||||
} else {
|
||||
num_threads = __kmp_avail_proc / num_teams;
|
||||
}
|
||||
// adjust num_threads w/o warning as it is not user setting
|
||||
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
|
||||
// no thread_limit clause specified - do not change thread-limit-var ICV
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
|
||||
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
|
||||
} // prevent team size to exceed thread-limit-var
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
num_threads = __kmp_teams_max_nth / num_teams;
|
||||
}
|
||||
} else {
|
||||
// This thread will be the master of the league masters
|
||||
// Store new thread limit; old limit is saved in th_cg_roots list
|
||||
thr->th.th_current_task->td_icvs.thread_limit = num_threads;
|
||||
// num_threads = min(num_threads, nthreads-var)
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
int new_threads = __kmp_teams_max_nth / num_teams;
|
||||
if (!__kmp_reserve_warn) { // user asked for too many threads
|
||||
__kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
|
||||
__kmp_push_thread_limit(thr, num_teams, num_threads);
|
||||
}
|
||||
|
||||
/* This sets the requested number of teams for the teams region and/or
|
||||
the number of threads for the next parallel region encountered */
|
||||
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
|
||||
int num_teams_ub, int num_threads) {
|
||||
kmp_info_t *thr = __kmp_threads[gtid];
|
||||
KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
|
||||
KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
|
||||
KMP_DEBUG_ASSERT(num_threads >= 0);
|
||||
|
||||
if (num_teams_lb > num_teams_ub) {
|
||||
__kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
|
||||
KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
|
||||
}
|
||||
|
||||
int num_teams = 1; // defalt number of teams is 1.
|
||||
|
||||
if (num_teams_lb == 0 && num_teams_ub > 0)
|
||||
num_teams_lb = num_teams_ub;
|
||||
|
||||
if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
|
||||
num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
|
||||
if (num_teams > __kmp_teams_max_nth) {
|
||||
if (!__kmp_reserve_warn) {
|
||||
__kmp_reserve_warn = 1;
|
||||
__kmp_msg(kmp_ms_warning,
|
||||
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
|
||||
KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
|
||||
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
|
||||
}
|
||||
num_threads = new_threads;
|
||||
num_teams = __kmp_teams_max_nth;
|
||||
}
|
||||
} else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
|
||||
num_teams = num_teams_ub;
|
||||
} else { // num_teams_lb <= num_teams <= num_teams_ub
|
||||
if (num_threads == 0) {
|
||||
if (num_teams_ub > __kmp_teams_max_nth) {
|
||||
num_teams = num_teams_lb;
|
||||
} else {
|
||||
num_teams = num_teams_ub;
|
||||
}
|
||||
} else {
|
||||
num_teams = (num_threads > __kmp_teams_max_nth)
|
||||
? num_teams
|
||||
: __kmp_teams_max_nth / num_threads;
|
||||
if (num_teams < num_teams_lb) {
|
||||
num_teams = num_teams_lb;
|
||||
} else if (num_teams > num_teams_ub) {
|
||||
num_teams = num_teams_ub;
|
||||
}
|
||||
}
|
||||
}
|
||||
thr->th.th_teams_size.nth = num_threads;
|
||||
// Set number of teams (number of threads in the outer "parallel" of the
|
||||
// teams)
|
||||
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
|
||||
|
||||
__kmp_push_thread_limit(thr, num_teams, num_threads);
|
||||
}
|
||||
|
||||
// Set the proc_bind var to use in the following parallel region.
|
||||
|
|
93
openmp/runtime/test/teams/kmp_num_teams.c
Normal file
93
openmp/runtime/test/teams/kmp_num_teams.c
Normal file
|
@ -0,0 +1,93 @@
|
|||
// RUN: %libomp-compile-and-run
|
||||
// UNSUPPORTED: gcc
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
|
||||
#define NT 8
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef int kmp_int32;
|
||||
typedef struct ident {
|
||||
kmp_int32 reserved_1;
|
||||
kmp_int32 flags;
|
||||
kmp_int32 reserved_2;
|
||||
kmp_int32 reserved_3;
|
||||
char const *psource;
|
||||
} ident_t;
|
||||
extern int __kmpc_global_thread_num(ident_t *);
|
||||
extern void __kmpc_push_num_teams_51(ident_t *, kmp_int32, kmp_int32, kmp_int32,
|
||||
kmp_int32);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
void check_num_teams(int num_teams_lb, int num_teams_ub, int thread_limit) {
|
||||
int nteams, nthreads;
|
||||
int a = 0;
|
||||
|
||||
int gtid = __kmpc_global_thread_num(NULL);
|
||||
__kmpc_push_num_teams_51(NULL, gtid, num_teams_lb, num_teams_ub,
|
||||
thread_limit);
|
||||
|
||||
#pragma omp target teams
|
||||
{
|
||||
int priv_nteams;
|
||||
int team_num = omp_get_team_num();
|
||||
if (team_num == 0)
|
||||
nteams = omp_get_num_teams();
|
||||
priv_nteams = omp_get_num_teams();
|
||||
#pragma omp parallel
|
||||
{
|
||||
int priv_nthreads;
|
||||
int thread_num = omp_get_thread_num();
|
||||
int teams_ub, teams_lb, thr_limit;
|
||||
if (team_num == 0 && thread_num == 0)
|
||||
nthreads = omp_get_num_threads();
|
||||
priv_nthreads = omp_get_num_threads();
|
||||
|
||||
teams_ub = (num_teams_ub ? num_teams_ub : priv_nteams);
|
||||
teams_lb = (num_teams_lb ? num_teams_lb : teams_ub);
|
||||
thr_limit = (thread_limit ? thread_limit : priv_nthreads);
|
||||
|
||||
if (priv_nteams < teams_lb || priv_nteams > teams_ub) {
|
||||
fprintf(stderr, "error: invalid number of teams=%d\n", priv_nteams);
|
||||
exit(1);
|
||||
}
|
||||
if (priv_nthreads > thr_limit) {
|
||||
fprintf(stderr, "error: invalid number of threads=%d\n", priv_nthreads);
|
||||
exit(1);
|
||||
}
|
||||
#pragma omp atomic
|
||||
a++;
|
||||
}
|
||||
}
|
||||
if (a != nteams * nthreads) {
|
||||
fprintf(stderr, "error: a (%d) != nteams * nthreads (%d)\n", a,
|
||||
nteams * nthreads);
|
||||
exit(1);
|
||||
} else {
|
||||
printf("#teams %d, #threads %d: Hello!\n", nteams, nthreads);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
omp_set_num_threads(NT);
|
||||
|
||||
check_num_teams(1, 8, 2);
|
||||
check_num_teams(2, 2, 2);
|
||||
check_num_teams(2, 2, 0);
|
||||
check_num_teams(8, 16, 2);
|
||||
check_num_teams(9, 16, 0);
|
||||
check_num_teams(9, 16, 2);
|
||||
check_num_teams(2, 3, 0);
|
||||
check_num_teams(0, 0, 2);
|
||||
check_num_teams(0, 4, 0);
|
||||
check_num_teams(0, 2, 2);
|
||||
|
||||
printf("Test Passed\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue