[Flang][OpenMP] Upstream the lowering of the parallel do combined construct

When parallel is used in a combined construct, then use a separate
function to create the parallel operation. It handles the parallel
specific clauses and leaves the rest for handling at the inner
operations.

Reviewed By: peixin, shraiysh

Differential Revision: https://reviews.llvm.org/D125465

Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com>
Co-authored-by: Eric Schweitz <eschweitz@nvidia.com>
Co-authored-by: Valentin Clement <clementval@gmail.com>
Co-authored-by: Nimish Mishra <neelam.nimish@gmail.com>
This commit is contained in:
Kiran Chandramohan 2022-05-19 20:23:04 +00:00
parent c153c61fad
commit 4202d69d9e
4 changed files with 221 additions and 29 deletions

View file

@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
standaloneConstruct.u); standaloneConstruct.u);
} }
static omp::ClauseProcBindKindAttr genProcBindKindAttr(
fir::FirOpBuilder &firOpBuilder,
const Fortran::parser::OmpClause::ProcBind *procBindClause) {
omp::ClauseProcBindKind pbKind;
switch (procBindClause->v.v) {
case Fortran::parser::OmpProcBindClause::Type::Master:
pbKind = omp::ClauseProcBindKind::Master;
break;
case Fortran::parser::OmpProcBindClause::Type::Close:
pbKind = omp::ClauseProcBindKind::Close;
break;
case Fortran::parser::OmpProcBindClause::Type::Spread:
pbKind = omp::ClauseProcBindKind::Spread;
break;
case Fortran::parser::OmpProcBindClause::Type::Primary:
pbKind = omp::ClauseProcBindKind::Primary;
break;
}
return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
}
/* When parallel is used in a combined construct, then use this function to
* create the parallel operation. It handles the parallel specific clauses
* and leaves the rest for handling at the inner operations.
* TODO: Refactor clause handling
*/
template <typename Directive>
static void
createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval,
const Directive &directive) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Location currentLocation = converter.getCurrentLocation();
Fortran::lower::StatementContext stmtCtx;
llvm::ArrayRef<mlir::Type> argTy;
mlir::Value ifClauseOperand, numThreadsClauseOperand;
SmallVector<Value> allocatorOperands, allocateOperands;
mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
const auto &opClauseList =
std::get<Fortran::parser::OmpClauseList>(directive.t);
// TODO: Handle the following clauses
// 1. default
// 2. copyin
// Note: rest of the clauses are handled when the inner operation is created
for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
if (const auto &ifClause =
std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
mlir::Value ifVal = fir::getBase(
converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
ifClauseOperand = firOpBuilder.createConvert(
currentLocation, firOpBuilder.getI1Type(), ifVal);
} else if (const auto &numThreadsClause =
std::get_if<Fortran::parser::OmpClause::NumThreads>(
&clause.u)) {
numThreadsClauseOperand = fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
} else if (const auto &procBindClause =
std::get_if<Fortran::parser::OmpClause::ProcBind>(
&clause.u)) {
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
}
}
// Create and insert the operation.
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, procBindKindAttr);
createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
&opClauseList, /*iv=*/{},
/*isCombined=*/true);
}
static void static void
genOMP(Fortran::lower::AbstractConverter &converter, genOMP(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval, Fortran::lower::pft::Evaluation &eval,
@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
} else if (const auto &procBindClause = } else if (const auto &procBindClause =
std::get_if<Fortran::parser::OmpClause::ProcBind>( std::get_if<Fortran::parser::OmpClause::ProcBind>(
&clause.u)) { &clause.u)) {
omp::ClauseProcBindKind pbKind; procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
switch (procBindClause->v.v) {
case Fortran::parser::OmpProcBindClause::Type::Master:
pbKind = omp::ClauseProcBindKind::Master;
break;
case Fortran::parser::OmpProcBindClause::Type::Close:
pbKind = omp::ClauseProcBindKind::Close;
break;
case Fortran::parser::OmpProcBindClause::Type::Spread:
pbKind = omp::ClauseProcBindKind::Spread;
break;
case Fortran::parser::OmpProcBindClause::Type::Primary:
pbKind = omp::ClauseProcBindKind::Primary;
break;
}
procBindKindAttr =
omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
} else if (const auto &allocateClause = } else if (const auto &allocateClause =
std::get_if<Fortran::parser::OmpClause::Allocate>( std::get_if<Fortran::parser::OmpClause::Allocate>(
&clause.u)) { &clause.u)) {
@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand; noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>( const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t); std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
if (llvm::omp::OMPD_do !=
const auto ompDirective =
std::get<Fortran::parser::OmpLoopDirective>( std::get<Fortran::parser::OmpLoopDirective>(
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t) std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
.v) { .v;
TODO(converter.getCurrentLocation(), "Combined worksharing loop construct"); if (llvm::omp::OMPD_parallel_do == ompDirective) {
createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
converter, eval,
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
} else if (llvm::omp::OMPD_do != ompDirective) {
TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
} }
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList); int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel Sections Construct // Parallel Sections Construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) { if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>( createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr, converter, eval,
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), std::get<Fortran::parser::OmpBeginSectionsDirective>(
/*reductions=*/nullptr, /*proc_bind_val*/ nullptr); sectionsConstruct.t));
createBodyOfOp(parallelOp, converter, currentLocation);
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>( auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
currentLocation, /*reduction_vars*/ ValueRange(), currentLocation, /*reduction_vars*/ ValueRange(),
/*reductions=*/nullptr, /*allocate_vars*/ ValueRange(), /*reductions=*/nullptr, allocateOperands, allocatorOperands,
/*allocators_vars*/ ValueRange(), /*nowait=*/nullptr); /*nowait=*/nullptr);
createBodyOfOp(sectionsOp, converter, currentLocation); createBodyOfOp(sectionsOp, converter, currentLocation);
// Sections Construct // Sections Construct

View file

@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
// CHECK: } // CHECK: }
// CHECK: llvm.return // CHECK: llvm.return
// CHECK: } // CHECK: }
// -----
func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
omp.parallel {
%c1 = arith.constant 1 : i32
%c50 = arith.constant 50 : i32
omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
%1 = fir.convert %indx : (i32) -> i64
%c1_i64 = arith.constant 1 : i64
%2 = arith.subi %1, %c1_i64 : i64
%3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
fir.store %indx to %3 : !fir.ref<i32>
omp.yield
}
omp.terminator
}
return
}
// Check only for the structure of the OpenMP portion and the feasibility of the conversion
// CHECK-LABEL: @_QPsb
// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
// CHECK: omp.parallel {
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
// CHECK: omp.yield
// CHECK: omp.terminator
// CHECK: llvm.return

View file

@ -0,0 +1,96 @@
! This test checks lowering of OpenMP DO Directive (Worksharing).
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
! CHECK-LABEL: func @_QPsimple_parallel_do()
subroutine simple_parallel_do
integer :: i
! CHECK: omp.parallel
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO
do i=1, 9
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_parallel_clauses(cond, nt)
logical :: cond
integer :: nt
integer :: i
! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
do i=1, 9
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
! CHECK-LABEL: func @_QPparallel_do_with_clauses
! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_clauses(nt)
integer :: nt
integer :: i
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: omp.parallel num_threads(%[[NT]] : i32)
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
do i=1, 9
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_privatisation_clauses(cond,nt)
logical :: cond
integer :: nt
integer :: i
! CHECK: omp.parallel
! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
do i=1, 9
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
print*, i, cond, nt
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine

View file

@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
integer, intent(inout) :: x, y integer, intent(inout) :: x, y
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32 !FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32 !LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
!OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) { !OMPDialect: omp.parallel {
!OMPDialect: omp.sections { !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x) !$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
!OMPDialect: omp.section { !OMPDialect: omp.section {
!$omp section !$omp section