[Flang][OpenMP] Upstream the lowering of the parallel do combined construct
When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com> Co-authored-by: Eric Schweitz <eschweitz@nvidia.com> Co-authored-by: Valentin Clement <clementval@gmail.com> Co-authored-by: Nimish Mishra <neelam.nimish@gmail.com>
This commit is contained in:
parent
c153c61fad
commit
4202d69d9e
|
@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
||||||
standaloneConstruct.u);
|
standaloneConstruct.u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static omp::ClauseProcBindKindAttr genProcBindKindAttr(
|
||||||
|
fir::FirOpBuilder &firOpBuilder,
|
||||||
|
const Fortran::parser::OmpClause::ProcBind *procBindClause) {
|
||||||
|
omp::ClauseProcBindKind pbKind;
|
||||||
|
switch (procBindClause->v.v) {
|
||||||
|
case Fortran::parser::OmpProcBindClause::Type::Master:
|
||||||
|
pbKind = omp::ClauseProcBindKind::Master;
|
||||||
|
break;
|
||||||
|
case Fortran::parser::OmpProcBindClause::Type::Close:
|
||||||
|
pbKind = omp::ClauseProcBindKind::Close;
|
||||||
|
break;
|
||||||
|
case Fortran::parser::OmpProcBindClause::Type::Spread:
|
||||||
|
pbKind = omp::ClauseProcBindKind::Spread;
|
||||||
|
break;
|
||||||
|
case Fortran::parser::OmpProcBindClause::Type::Primary:
|
||||||
|
pbKind = omp::ClauseProcBindKind::Primary;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When parallel is used in a combined construct, then use this function to
|
||||||
|
* create the parallel operation. It handles the parallel specific clauses
|
||||||
|
* and leaves the rest for handling at the inner operations.
|
||||||
|
* TODO: Refactor clause handling
|
||||||
|
*/
|
||||||
|
template <typename Directive>
|
||||||
|
static void
|
||||||
|
createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
|
||||||
|
Fortran::lower::pft::Evaluation &eval,
|
||||||
|
const Directive &directive) {
|
||||||
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||||
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
||||||
|
Fortran::lower::StatementContext stmtCtx;
|
||||||
|
llvm::ArrayRef<mlir::Type> argTy;
|
||||||
|
mlir::Value ifClauseOperand, numThreadsClauseOperand;
|
||||||
|
SmallVector<Value> allocatorOperands, allocateOperands;
|
||||||
|
mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
|
||||||
|
const auto &opClauseList =
|
||||||
|
std::get<Fortran::parser::OmpClauseList>(directive.t);
|
||||||
|
// TODO: Handle the following clauses
|
||||||
|
// 1. default
|
||||||
|
// 2. copyin
|
||||||
|
// Note: rest of the clauses are handled when the inner operation is created
|
||||||
|
for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
|
||||||
|
if (const auto &ifClause =
|
||||||
|
std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
|
||||||
|
auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
|
||||||
|
mlir::Value ifVal = fir::getBase(
|
||||||
|
converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
|
||||||
|
ifClauseOperand = firOpBuilder.createConvert(
|
||||||
|
currentLocation, firOpBuilder.getI1Type(), ifVal);
|
||||||
|
} else if (const auto &numThreadsClause =
|
||||||
|
std::get_if<Fortran::parser::OmpClause::NumThreads>(
|
||||||
|
&clause.u)) {
|
||||||
|
numThreadsClauseOperand = fir::getBase(converter.genExprValue(
|
||||||
|
*Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
|
||||||
|
} else if (const auto &procBindClause =
|
||||||
|
std::get_if<Fortran::parser::OmpClause::ProcBind>(
|
||||||
|
&clause.u)) {
|
||||||
|
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create and insert the operation.
|
||||||
|
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
|
||||||
|
currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
|
||||||
|
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
|
||||||
|
/*reductions=*/nullptr, procBindKindAttr);
|
||||||
|
|
||||||
|
createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
|
||||||
|
&opClauseList, /*iv=*/{},
|
||||||
|
/*isCombined=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
genOMP(Fortran::lower::AbstractConverter &converter,
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
||||||
Fortran::lower::pft::Evaluation &eval,
|
Fortran::lower::pft::Evaluation &eval,
|
||||||
|
@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
||||||
} else if (const auto &procBindClause =
|
} else if (const auto &procBindClause =
|
||||||
std::get_if<Fortran::parser::OmpClause::ProcBind>(
|
std::get_if<Fortran::parser::OmpClause::ProcBind>(
|
||||||
&clause.u)) {
|
&clause.u)) {
|
||||||
omp::ClauseProcBindKind pbKind;
|
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
|
||||||
switch (procBindClause->v.v) {
|
|
||||||
case Fortran::parser::OmpProcBindClause::Type::Master:
|
|
||||||
pbKind = omp::ClauseProcBindKind::Master;
|
|
||||||
break;
|
|
||||||
case Fortran::parser::OmpProcBindClause::Type::Close:
|
|
||||||
pbKind = omp::ClauseProcBindKind::Close;
|
|
||||||
break;
|
|
||||||
case Fortran::parser::OmpProcBindClause::Type::Spread:
|
|
||||||
pbKind = omp::ClauseProcBindKind::Spread;
|
|
||||||
break;
|
|
||||||
case Fortran::parser::OmpProcBindClause::Type::Primary:
|
|
||||||
pbKind = omp::ClauseProcBindKind::Primary;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
procBindKindAttr =
|
|
||||||
omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
|
|
||||||
} else if (const auto &allocateClause =
|
} else if (const auto &allocateClause =
|
||||||
std::get_if<Fortran::parser::OmpClause::Allocate>(
|
std::get_if<Fortran::parser::OmpClause::Allocate>(
|
||||||
&clause.u)) {
|
&clause.u)) {
|
||||||
|
@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
||||||
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
|
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
|
||||||
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
|
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
|
||||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
|
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
|
||||||
if (llvm::omp::OMPD_do !=
|
|
||||||
|
const auto ompDirective =
|
||||||
std::get<Fortran::parser::OmpLoopDirective>(
|
std::get<Fortran::parser::OmpLoopDirective>(
|
||||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
|
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
|
||||||
.v) {
|
.v;
|
||||||
TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
|
if (llvm::omp::OMPD_parallel_do == ompDirective) {
|
||||||
|
createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
|
||||||
|
converter, eval,
|
||||||
|
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
|
||||||
|
} else if (llvm::omp::OMPD_do != ompDirective) {
|
||||||
|
TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
|
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
|
||||||
|
@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
||||||
|
|
||||||
// Parallel Sections Construct
|
// Parallel Sections Construct
|
||||||
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
|
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
|
||||||
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
|
createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
|
||||||
currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
|
converter, eval,
|
||||||
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
|
std::get<Fortran::parser::OmpBeginSectionsDirective>(
|
||||||
/*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
|
sectionsConstruct.t));
|
||||||
createBodyOfOp(parallelOp, converter, currentLocation);
|
|
||||||
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
|
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
|
||||||
currentLocation, /*reduction_vars*/ ValueRange(),
|
currentLocation, /*reduction_vars*/ ValueRange(),
|
||||||
/*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
|
/*reductions=*/nullptr, allocateOperands, allocatorOperands,
|
||||||
/*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
|
/*nowait=*/nullptr);
|
||||||
createBodyOfOp(sectionsOp, converter, currentLocation);
|
createBodyOfOp(sectionsOp, converter, currentLocation);
|
||||||
|
|
||||||
// Sections Construct
|
// Sections Construct
|
||||||
|
|
|
@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
|
||||||
// CHECK: }
|
// CHECK: }
|
||||||
// CHECK: llvm.return
|
// CHECK: llvm.return
|
||||||
// CHECK: }
|
// CHECK: }
|
||||||
|
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
|
||||||
|
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
|
||||||
|
omp.parallel {
|
||||||
|
%c1 = arith.constant 1 : i32
|
||||||
|
%c50 = arith.constant 50 : i32
|
||||||
|
omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
|
||||||
|
%1 = fir.convert %indx : (i32) -> i64
|
||||||
|
%c1_i64 = arith.constant 1 : i64
|
||||||
|
%2 = arith.subi %1, %c1_i64 : i64
|
||||||
|
%3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
|
||||||
|
fir.store %indx to %3 : !fir.ref<i32>
|
||||||
|
omp.yield
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check only for the structure of the OpenMP portion and the feasibility of the conversion
|
||||||
|
// CHECK-LABEL: @_QPsb
|
||||||
|
// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
|
||||||
|
// CHECK: omp.parallel {
|
||||||
|
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
|
||||||
|
// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
|
||||||
|
// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
|
||||||
|
// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
|
||||||
|
// CHECK: omp.yield
|
||||||
|
// CHECK: omp.terminator
|
||||||
|
// CHECK: llvm.return
|
||||||
|
|
96
flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
Normal file
96
flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
! This test checks lowering of OpenMP DO Directive (Worksharing).
|
||||||
|
|
||||||
|
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
! CHECK-LABEL: func @_QPsimple_parallel_do()
|
||||||
|
subroutine simple_parallel_do
|
||||||
|
integer :: i
|
||||||
|
! CHECK: omp.parallel
|
||||||
|
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||||
|
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||||
|
!$OMP PARALLEL DO
|
||||||
|
do i=1, 9
|
||||||
|
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||||
|
print*, i
|
||||||
|
end do
|
||||||
|
! CHECK: omp.yield
|
||||||
|
! CHECK: omp.terminator
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
|
||||||
|
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||||
|
subroutine parallel_do_with_parallel_clauses(cond, nt)
|
||||||
|
logical :: cond
|
||||||
|
integer :: nt
|
||||||
|
integer :: i
|
||||||
|
! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
|
||||||
|
! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
|
||||||
|
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||||
|
! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
|
||||||
|
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||||
|
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||||
|
!$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
|
||||||
|
do i=1, 9
|
||||||
|
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||||
|
print*, i
|
||||||
|
end do
|
||||||
|
! CHECK: omp.yield
|
||||||
|
! CHECK: omp.terminator
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! CHECK-LABEL: func @_QPparallel_do_with_clauses
|
||||||
|
! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||||
|
subroutine parallel_do_with_clauses(nt)
|
||||||
|
integer :: nt
|
||||||
|
integer :: i
|
||||||
|
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||||
|
! CHECK: omp.parallel num_threads(%[[NT]] : i32)
|
||||||
|
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||||
|
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||||
|
!$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
|
||||||
|
do i=1, 9
|
||||||
|
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||||
|
print*, i
|
||||||
|
end do
|
||||||
|
! CHECK: omp.yield
|
||||||
|
! CHECK: omp.terminator
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
|
||||||
|
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||||
|
subroutine parallel_do_with_privatisation_clauses(cond,nt)
|
||||||
|
logical :: cond
|
||||||
|
integer :: nt
|
||||||
|
integer :: i
|
||||||
|
! CHECK: omp.parallel
|
||||||
|
! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
|
||||||
|
! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
|
||||||
|
! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||||
|
! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
|
||||||
|
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||||
|
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||||
|
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||||
|
!$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
|
||||||
|
do i=1, 9
|
||||||
|
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||||
|
! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
|
||||||
|
! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
|
||||||
|
! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
|
||||||
|
! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
|
||||||
|
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
|
||||||
|
print*, i, cond, nt
|
||||||
|
end do
|
||||||
|
! CHECK: omp.yield
|
||||||
|
! CHECK: omp.terminator
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
end subroutine
|
|
@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
|
||||||
integer, intent(inout) :: x, y
|
integer, intent(inout) :: x, y
|
||||||
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
|
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
|
||||||
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
|
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
|
||||||
!OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
|
!OMPDialect: omp.parallel {
|
||||||
!OMPDialect: omp.sections {
|
!OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
|
||||||
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
|
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
|
||||||
!OMPDialect: omp.section {
|
!OMPDialect: omp.section {
|
||||||
!$omp section
|
!$omp section
|
||||||
|
|
Loading…
Reference in a new issue