llvm/clang/test/OpenMP/unroll_codegen_heuristic.cpp
Michael Kruse a22236120f [OpenMP] Implement '#pragma omp unroll'.
Implementation of the unroll directive introduced in OpenMP 5.1. Follows the approach from D76342 for the tile directive (i.e. AST-based, not using the OpenMPIRBuilder). Tries to use `llvm.loop.unroll.*` metadata where possible, but has to fall back to an AST representation of the outer loop if the partially unrolled generated loop is associated with another directive (because it needs to compute the number of iterations).

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D99459
2021-06-10 14:30:17 -05:00

65 lines
2.7 KiB
C++

// Check code generation
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// placeholder for loop body code.
extern "C" void body(...) {}
// IR-LABEL: @func(
// IR-NEXT: [[ENTRY:.*]]:
// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4
// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4
// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4
// IR-NEXT: %[[I:.+]] = alloca i32, align 4
// IR-NEXT: store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
// IR-NEXT: store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
// IR-NEXT: store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
// IR-NEXT: %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
// IR-NEXT: store i32 %[[TMP0]], i32* %[[I]], align 4
// IR-NEXT: br label %[[FOR_COND:.+]]
// IR-EMPTY:
// IR-NEXT: [[FOR_COND]]:
// IR-NEXT: %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
// IR-NEXT: %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
// IR-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
// IR-NEXT: br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
// IR-EMPTY:
// IR-NEXT: [[FOR_BODY]]:
// IR-NEXT: %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
// IR-NEXT: %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
// IR-NEXT: %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
// IR-NEXT: %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
// IR-NEXT: call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
// IR-NEXT: br label %[[FOR_INC:.+]]
// IR-EMPTY:
// IR-NEXT: [[FOR_INC]]:
// IR-NEXT: %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
// IR-NEXT: %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
// IR-NEXT: %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
// IR-NEXT: store i32 %[[ADD]], i32* %[[I]], align 4
// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
// IR-EMPTY:
// IR-NEXT: [[FOR_END]]:
// IR-NEXT: ret void
// IR-NEXT: }
extern "C" void func(int start, int end, int step) {
#pragma omp unroll
for (int i = start; i < end; i+=step)
body(start, end, step, i);
}
#endif /* HEADER */
// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}