[Flang][OpenMP] Upstream the lowering of the parallel do combined construct

When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com> Co-authored-by: Eric Schweitz <eschweitz@nvidia.com> Co-authored-by: Valentin Clement <clementval@gmail.com> Co-authored-by: Nimish Mishra <neelam.nimish@gmail.com>
2022-05-19 20:23:04 +00:00 · 2022-05-19 20:23:04 +00:00 · 4202d69d9e
parent c153c61fad
commit 4202d69d9e
4 changed files with 221 additions and 29 deletions
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
      standaloneConstruct.u);
 }
 static omp::ClauseProcBindKindAttr genProcBindKindAttr(
    fir::FirOpBuilder &firOpBuilder,
    const Fortran::parser::OmpClause::ProcBind *procBindClause) {
  omp::ClauseProcBindKind pbKind;
  switch (procBindClause->v.v) {
  case Fortran::parser::OmpProcBindClause::Type::Master:
    pbKind = omp::ClauseProcBindKind::Master;
    break;
  case Fortran::parser::OmpProcBindClause::Type::Close:
    pbKind = omp::ClauseProcBindKind::Close;
    break;
  case Fortran::parser::OmpProcBindClause::Type::Spread:
    pbKind = omp::ClauseProcBindKind::Spread;
    break;
  case Fortran::parser::OmpProcBindClause::Type::Primary:
    pbKind = omp::ClauseProcBindKind::Primary;
    break;
  }
  return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
 }
 /* When parallel is used in a combined construct, then use this function to
 * create the parallel operation. It handles the parallel specific clauses
 * and leaves the rest for handling at the inner operations.
 * TODO: Refactor clause handling
 */
 template <typename Directive>
 static void
 createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
                         Fortran::lower::pft::Evaluation &eval,
                         const Directive &directive) {
  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
  mlir::Location currentLocation = converter.getCurrentLocation();
  Fortran::lower::StatementContext stmtCtx;
  llvm::ArrayRef<mlir::Type> argTy;
  mlir::Value ifClauseOperand, numThreadsClauseOperand;
  SmallVector<Value> allocatorOperands, allocateOperands;
  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
  const auto &opClauseList =
      std::get<Fortran::parser::OmpClauseList>(directive.t);
  // TODO: Handle the following clauses
  // 1. default
  // 2. copyin
  // Note: rest of the clauses are handled when the inner operation is created
  for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
    if (const auto &ifClause =
            std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
      auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
      mlir::Value ifVal = fir::getBase(
          converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
      ifClauseOperand = firOpBuilder.createConvert(
          currentLocation, firOpBuilder.getI1Type(), ifVal);
    } else if (const auto &numThreadsClause =
                   std::get_if<Fortran::parser::OmpClause::NumThreads>(
                       &clause.u)) {
      numThreadsClauseOperand = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
    } else if (const auto &procBindClause =
                   std::get_if<Fortran::parser::OmpClause::ProcBind>(
                       &clause.u)) {
      procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
    }
  }
  // Create and insert the operation.
  auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
      currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
      allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
      /*reductions=*/nullptr, procBindKindAttr);
  createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
                                  &opClauseList, /*iv=*/{},
                                  /*isCombined=*/true);
 }
 static void
 genOMP(Fortran::lower::AbstractConverter &converter,
       Fortran::lower::pft::Evaluation &eval,
@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
    } else if (const auto &procBindClause =
                   std::get_if<Fortran::parser::OmpClause::ProcBind>(
                       &clause.u)) {
-      omp::ClauseProcBindKind pbKind;
+      procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
      switch (procBindClause->v.v) {
      case Fortran::parser::OmpProcBindClause::Type::Master:
        pbKind = omp::ClauseProcBindKind::Master;
        break;
      case Fortran::parser::OmpProcBindClause::Type::Close:
        pbKind = omp::ClauseProcBindKind::Close;
        break;
      case Fortran::parser::OmpProcBindClause::Type::Spread:
        pbKind = omp::ClauseProcBindKind::Spread;
        break;
      case Fortran::parser::OmpProcBindClause::Type::Primary:
        pbKind = omp::ClauseProcBindKind::Primary;
        break;
      }
      procBindKindAttr =
          omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
    } else if (const auto &allocateClause =
                   std::get_if<Fortran::parser::OmpClause::Allocate>(
                       &clause.u)) {
@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
      noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
  const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
      std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
-  if (llvm::omp::OMPD_do !=
+
  const auto ompDirective =
      std::get<Fortran::parser::OmpLoopDirective>(
          std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
-          .v) {
+          .v;
-    TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
+  if (llvm::omp::OMPD_parallel_do == ompDirective) {
    createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
        converter, eval,
        std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
  } else if (llvm::omp::OMPD_do != ompDirective) {
    TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
  }
  int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
  // Parallel Sections Construct
  if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
+    createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
-        currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
+        converter, eval,
-        allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
+        std::get<Fortran::parser::OmpBeginSectionsDirective>(
-        /*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
+            sectionsConstruct.t));
    createBodyOfOp(parallelOp, converter, currentLocation);
    auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
        currentLocation, /*reduction_vars*/ ValueRange(),
-        /*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
+        /*reductions=*/nullptr, allocateOperands, allocatorOperands,
-        /*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
+        /*nowait=*/nullptr);
    createBodyOfOp(sectionsOp, converter, currentLocation);
    // Sections Construct
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
 // CHECK: }
 // CHECK: llvm.return
 // CHECK: }
 // -----
 func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
  %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
  omp.parallel   {
    %c1 = arith.constant 1 : i32
    %c50 = arith.constant 50 : i32
    omp.wsloop   for  (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
      %1 = fir.convert %indx : (i32) -> i64
      %c1_i64 = arith.constant 1 : i64
      %2 = arith.subi %1, %c1_i64 : i64
      %3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
      fir.store %indx to %3 : !fir.ref<i32>
      omp.yield
    }
    omp.terminator
  }
  return
 }
 // Check only for the structure of the OpenMP portion and the feasibility of the conversion
 // CHECK-LABEL: @_QPsb
 // CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
 // CHECK:    omp.parallel   {
 // CHECK:      %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK:      %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
 // CHECK:      omp.wsloop   for  (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
 // CHECK:        llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
 // CHECK:        omp.yield
 // CHECK:      omp.terminator
 // CHECK:    llvm.return
--- a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@ -0,0 +1,96 @@
 ! This test checks lowering of OpenMP DO Directive (Worksharing).
 ! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
 ! CHECK-LABEL: func @_QPsimple_parallel_do()
 subroutine simple_parallel_do
  integer :: i
  ! CHECK:  omp.parallel
  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
  !$OMP PARALLEL DO
  do i=1, 9
  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
    print*, i
  end do
  ! CHECK:       omp.yield
  ! CHECK:       omp.terminator
  !$OMP END PARALLEL DO
 end subroutine
 ! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
 ! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
 subroutine parallel_do_with_parallel_clauses(cond, nt)
  logical :: cond
  integer :: nt
  integer :: i
  ! CHECK:  %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
  ! CHECK:  %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
  ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
  ! CHECK:  omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
  !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
  do i=1, 9
  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
    print*, i
  end do
  ! CHECK:       omp.yield
  ! CHECK:       omp.terminator
  !$OMP END PARALLEL DO
 end subroutine
 ! CHECK-LABEL: func @_QPparallel_do_with_clauses
 ! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
 subroutine parallel_do_with_clauses(nt)
  integer :: nt
  integer :: i
  ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
  ! CHECK:  omp.parallel num_threads(%[[NT]] : i32)
  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
  !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
  do i=1, 9
  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
    print*, i
  end do
  ! CHECK:       omp.yield
  ! CHECK:       omp.terminator
  !$OMP END PARALLEL DO
 end subroutine
 ! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
 ! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
 subroutine parallel_do_with_privatisation_clauses(cond,nt)
  logical :: cond
  integer :: nt
  integer :: i
  ! CHECK:  omp.parallel
  ! CHECK:    %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
  ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
  ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
  ! CHECK:    fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
  ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
  ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
  ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
  !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
  do i=1, 9
  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
  ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
  ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
  ! CHECK:      fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
  ! CHECK:      %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
    print*, i, cond, nt
  end do
  ! CHECK:      omp.yield
  ! CHECK:    omp.terminator
  !$OMP END PARALLEL DO
 end subroutine
--- a/flang/test/Lower/OpenMP/parallel-sections.f90
+++ b/flang/test/Lower/OpenMP/parallel-sections.f90
@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
  integer, intent(inout) :: x, y
  !FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
  !LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
-  !OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
+  !OMPDialect: omp.parallel {
-  !OMPDialect: omp.sections {
+  !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
  !$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
    !OMPDialect: omp.section {
    !$omp section