[OpenMP] Add lower and upper bound in num_teams clause

This patch adds lower-bound and upper-bound to num_teams clause
according to OpenMP 5.1 specification. The initial number of teams
created is implementation defined, but it will be greater than or
equal to lower-bound and less than or equal to upper-bound. If
num_teams clause is not specified, the number of teams created is
implementation defined, but it will be greater or equal to 1.

Differential Revision: https://reviews.llvm.org/D95820
This commit is contained in:
Nawrin Sultana 2020-10-30 13:04:25 -05:00
parent 544cebd619
commit 4692bb4a8a
6 changed files with 240 additions and 39 deletions

View file

@ -360,6 +360,7 @@ kmpc_set_defaults 224
__kmpc_team_static_init_4u 256
__kmpc_team_static_init_8 257
__kmpc_team_static_init_8u 258
__kmpc_push_num_teams_51 284
%endif
%ifndef stub

View file

@ -455,6 +455,7 @@ AffHWSubsetManyDies "KMP_HW_SUBSET ignored: too many Dies requested."
AffUseGlobCpuidL "%1$s: Affinity capable, using global cpuid leaf %2$d info"
AffNotCapableUseLocCpuidL "%1$s: Affinity not capable, using local cpuid leaf %2$d info"
AffNotUsingHwloc "%1$s: Affinity not capable, using hwloc."
FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-
@ -512,7 +513,7 @@ BadExeFormat "System error #193 is \"Bad format of EXE or DLL fi
"a file for another architecture. "
"Check whether \"%1$s\" is a file for %2$s architecture."
SystemLimitOnThreads "System-related limit on the number of threads."
SetNewBound "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause."
# --------------------------------------------------------------------------------------------------

View file

@ -3363,6 +3363,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
kmp_proc_bind_t proc_bind);
extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
int num_threads);
extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
int num_teams_ub, int num_threads);
extern void __kmp_yield();
@ -3921,6 +3923,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams,
kmp_int32 num_threads);
/* Function for OpenMP 5.1 num_teams clause */
KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams_lb,
kmp_int32 num_teams_ub,
kmp_int32 num_threads);
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
kmpc_micro microtask, ...);
struct kmp_dim { // loop bounds info casted to kmp_int64

View file

@ -351,6 +351,33 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
}
/*!
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param num_teams_lo lower bound on number of teams requested for the teams
construct
@param num_teams_up upper bound on number of teams requested for the teams
construct
@param num_threads number of threads per team requested for the teams construct
Set the number of teams to be used by the teams construct. The number of initial
teams cretaed will be greater than or equal to the lower bound and less than or
equal to the upper bound.
This call is only required if the teams construct has a `num_teams` clause
or a `thread_limit` clause (or both).
*/
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
kmp_int32 num_threads) {
KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n",
global_tid, num_teams_lb, num_teams_ub, num_threads));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
num_threads);
}
/*!
@ingroup PARALLEL
@param loc source location information

View file

@ -7425,6 +7425,63 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
thr->th.th_set_nproc = num_threads;
}
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
int num_threads) {
KMP_DEBUG_ASSERT(thr);
// Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
if (num_threads == 0) {
if (__kmp_teams_thread_limit > 0) {
num_threads = __kmp_teams_thread_limit;
} else {
num_threads = __kmp_avail_proc / num_teams;
}
// adjust num_threads w/o warning as it is not user setting
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
// no thread_limit clause specified - do not change thread-limit-var ICV
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
} // prevent team size to exceed thread-limit-var
if (num_teams * num_threads > __kmp_teams_max_nth) {
num_threads = __kmp_teams_max_nth / num_teams;
}
if (num_threads == 0) {
num_threads = 1;
}
} else {
// This thread will be the master of the league masters
// Store new thread limit; old limit is saved in th_cg_roots list
thr->th.th_current_task->td_icvs.thread_limit = num_threads;
// num_threads = min(num_threads, nthreads-var)
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_teams * num_threads > __kmp_teams_max_nth) {
int new_threads = __kmp_teams_max_nth / num_teams;
if (new_threads == 0) {
new_threads = 1;
}
if (new_threads != num_threads) {
if (!__kmp_reserve_warn) { // user asked for too many threads
__kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
__kmp_msg(kmp_ms_warning,
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
}
}
num_threads = new_threads;
}
}
thr->th.th_teams_size.nth = num_threads;
}
/* this sets the requested number of teams for the teams region and/or
the number of threads for the next parallel region encountered */
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
@ -7453,49 +7510,64 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
// teams)
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
// Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
if (num_threads == 0) {
if (__kmp_teams_thread_limit > 0) {
num_threads = __kmp_teams_thread_limit;
} else {
num_threads = __kmp_avail_proc / num_teams;
}
// adjust num_threads w/o warning as it is not user setting
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
// no thread_limit clause specified - do not change thread-limit-var ICV
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
} // prevent team size to exceed thread-limit-var
if (num_teams * num_threads > __kmp_teams_max_nth) {
num_threads = __kmp_teams_max_nth / num_teams;
}
} else {
// This thread will be the master of the league masters
// Store new thread limit; old limit is saved in th_cg_roots list
thr->th.th_current_task->td_icvs.thread_limit = num_threads;
// num_threads = min(num_threads, nthreads-var)
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_teams * num_threads > __kmp_teams_max_nth) {
int new_threads = __kmp_teams_max_nth / num_teams;
if (!__kmp_reserve_warn) { // user asked for too many threads
__kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
__kmp_push_thread_limit(thr, num_teams, num_threads);
}
/* This sets the requested number of teams for the teams region and/or
the number of threads for the next parallel region encountered */
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
int num_teams_ub, int num_threads) {
kmp_info_t *thr = __kmp_threads[gtid];
KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
KMP_DEBUG_ASSERT(num_threads >= 0);
if (num_teams_lb > num_teams_ub) {
__kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
}
int num_teams = 1; // defalt number of teams is 1.
if (num_teams_lb == 0 && num_teams_ub > 0)
num_teams_lb = num_teams_ub;
if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
if (num_teams > __kmp_teams_max_nth) {
if (!__kmp_reserve_warn) {
__kmp_reserve_warn = 1;
__kmp_msg(kmp_ms_warning,
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
}
num_threads = new_threads;
num_teams = __kmp_teams_max_nth;
}
} else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
num_teams = num_teams_ub;
} else { // num_teams_lb <= num_teams <= num_teams_ub
if (num_threads == 0) {
if (num_teams_ub > __kmp_teams_max_nth) {
num_teams = num_teams_lb;
} else {
num_teams = num_teams_ub;
}
} else {
num_teams = (num_threads > __kmp_teams_max_nth)
? num_teams
: __kmp_teams_max_nth / num_threads;
if (num_teams < num_teams_lb) {
num_teams = num_teams_lb;
} else if (num_teams > num_teams_ub) {
num_teams = num_teams_ub;
}
}
}
thr->th.th_teams_size.nth = num_threads;
// Set number of teams (number of threads in the outer "parallel" of the
// teams)
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
__kmp_push_thread_limit(thr, num_teams, num_threads);
}
// Set the proc_bind var to use in the following parallel region.

View file

@ -0,0 +1,93 @@
// RUN: %libomp-compile-and-run
// UNSUPPORTED: gcc
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#define NT 8
#ifdef __cplusplus
extern "C" {
#endif
typedef int kmp_int32;
typedef struct ident {
kmp_int32 reserved_1;
kmp_int32 flags;
kmp_int32 reserved_2;
kmp_int32 reserved_3;
char const *psource;
} ident_t;
extern int __kmpc_global_thread_num(ident_t *);
extern void __kmpc_push_num_teams_51(ident_t *, kmp_int32, kmp_int32, kmp_int32,
kmp_int32);
#ifdef __cplusplus
}
#endif
void check_num_teams(int num_teams_lb, int num_teams_ub, int thread_limit) {
int nteams, nthreads;
int a = 0;
int gtid = __kmpc_global_thread_num(NULL);
__kmpc_push_num_teams_51(NULL, gtid, num_teams_lb, num_teams_ub,
thread_limit);
#pragma omp target teams
{
int priv_nteams;
int team_num = omp_get_team_num();
if (team_num == 0)
nteams = omp_get_num_teams();
priv_nteams = omp_get_num_teams();
#pragma omp parallel
{
int priv_nthreads;
int thread_num = omp_get_thread_num();
int teams_ub, teams_lb, thr_limit;
if (team_num == 0 && thread_num == 0)
nthreads = omp_get_num_threads();
priv_nthreads = omp_get_num_threads();
teams_ub = (num_teams_ub ? num_teams_ub : priv_nteams);
teams_lb = (num_teams_lb ? num_teams_lb : teams_ub);
thr_limit = (thread_limit ? thread_limit : priv_nthreads);
if (priv_nteams < teams_lb || priv_nteams > teams_ub) {
fprintf(stderr, "error: invalid number of teams=%d\n", priv_nteams);
exit(1);
}
if (priv_nthreads > thr_limit) {
fprintf(stderr, "error: invalid number of threads=%d\n", priv_nthreads);
exit(1);
}
#pragma omp atomic
a++;
}
}
if (a != nteams * nthreads) {
fprintf(stderr, "error: a (%d) != nteams * nthreads (%d)\n", a,
nteams * nthreads);
exit(1);
} else {
printf("#teams %d, #threads %d: Hello!\n", nteams, nthreads);
}
}
int main(int argc, char *argv[]) {
omp_set_num_threads(NT);
check_num_teams(1, 8, 2);
check_num_teams(2, 2, 2);
check_num_teams(2, 2, 0);
check_num_teams(8, 16, 2);
check_num_teams(9, 16, 0);
check_num_teams(9, 16, 2);
check_num_teams(2, 3, 0);
check_num_teams(0, 0, 2);
check_num_teams(0, 4, 0);
check_num_teams(0, 2, 2);
printf("Test Passed\n");
return 0;
}