new altera single work item barrier check
This lint check is a part of the FLOCL (FPGA Linters for OpenCL) project out of the Synergy Lab at Virginia Tech. FLOCL is a set of lint checks aimed at FPGA developers who write code in OpenCL. The altera single work item barrier check finds OpenCL kernel functions that call a barrier function but do not call an ID function. These kernel functions will be treated as single work-item kernels, which could be inefficient or lead to errors. Based on the "Altera SDK for OpenCL: Best Practices Guide."
This commit is contained in:
parent
e35f9229dc
commit
e69e551e0e
|
@ -10,6 +10,7 @@
|
|||
#include "../ClangTidyModule.h"
|
||||
#include "../ClangTidyModuleRegistry.h"
|
||||
#include "KernelNameRestrictionCheck.h"
|
||||
#include "SingleWorkItemBarrierCheck.h"
|
||||
#include "StructPackAlignCheck.h"
|
||||
|
||||
using namespace clang::ast_matchers;
|
||||
|
@ -23,6 +24,8 @@ public:
|
|||
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
|
||||
CheckFactories.registerCheck<KernelNameRestrictionCheck>(
|
||||
"altera-kernel-name-restriction");
|
||||
CheckFactories.registerCheck<SingleWorkItemBarrierCheck>(
|
||||
"altera-single-work-item-barrier");
|
||||
CheckFactories.registerCheck<StructPackAlignCheck>(
|
||||
"altera-struct-pack-align");
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
|
|||
add_clang_library(clangTidyAlteraModule
|
||||
AlteraTidyModule.cpp
|
||||
KernelNameRestrictionCheck.cpp
|
||||
SingleWorkItemBarrierCheck.cpp
|
||||
StructPackAlignCheck.cpp
|
||||
|
||||
LINK_LIBS
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
//===--- SingleWorkItemBarrierCheck.cpp - clang-tidy-----------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SingleWorkItemBarrierCheck.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||
|
||||
using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace altera {
|
||||
|
||||
void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) {
|
||||
// Find any function that calls barrier but does not call an ID function.
|
||||
// hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions.
|
||||
// FIXME: Have it accept all functions but check for a parameter that gets an
|
||||
// ID from one of the four ID functions.
|
||||
Finder->addMatcher(
|
||||
// Find function declarations...
|
||||
functionDecl(
|
||||
allOf(
|
||||
// That are OpenCL kernels...
|
||||
hasAttr(attr::Kind::OpenCLKernel),
|
||||
// And call a barrier function (either 1.x or 2.x version)...
|
||||
forEachDescendant(callExpr(callee(functionDecl(hasAnyName(
|
||||
"barrier", "work_group_barrier"))))
|
||||
.bind("barrier")),
|
||||
// But do not call an ID function.
|
||||
unless(hasDescendant(callExpr(callee(functionDecl(
|
||||
hasAnyName("get_global_id", "get_local_id", "get_group_id",
|
||||
"get_local_linear_id"))))))))
|
||||
.bind("function"),
|
||||
this);
|
||||
}
|
||||
|
||||
void SingleWorkItemBarrierCheck::check(const MatchFinder::MatchResult &Result) {
|
||||
const auto *MatchedDecl = Result.Nodes.getNodeAs<FunctionDecl>("function");
|
||||
const auto *MatchedBarrier = Result.Nodes.getNodeAs<CallExpr>("barrier");
|
||||
if (AOCVersion < 1701) {
|
||||
// get_group_id and get_local_linear_id were added at/after v17.01
|
||||
diag(MatchedDecl->getLocation(),
|
||||
"kernel function %0 does not call 'get_global_id' or 'get_local_id' "
|
||||
"and will be treated as a single work-item")
|
||||
<< MatchedDecl;
|
||||
diag(MatchedBarrier->getBeginLoc(),
|
||||
"barrier call is in a single work-item and may error out",
|
||||
DiagnosticIDs::Note);
|
||||
} else {
|
||||
// If reqd_work_group_size is anything other than (1,1,1), it will be
|
||||
// interpreted as an NDRange in AOC version >= 17.1.
|
||||
bool IsNDRange = false;
|
||||
if (MatchedDecl->hasAttr<ReqdWorkGroupSizeAttr>()) {
|
||||
const auto *Attribute = MatchedDecl->getAttr<ReqdWorkGroupSizeAttr>();
|
||||
if (Attribute->getXDim() > 1 || Attribute->getYDim() > 1 ||
|
||||
Attribute->getZDim() > 1)
|
||||
IsNDRange = true;
|
||||
}
|
||||
if (IsNDRange) // No warning if kernel is treated as an NDRange.
|
||||
return;
|
||||
diag(MatchedDecl->getLocation(),
|
||||
"kernel function %0 does not call an ID function and may be a viable "
|
||||
"single work-item, but will be forced to execute as an NDRange")
|
||||
<< MatchedDecl;
|
||||
diag(MatchedBarrier->getBeginLoc(),
|
||||
"barrier call will force NDRange execution; if single work-item "
|
||||
"semantics are desired a mem_fence may be more efficient",
|
||||
DiagnosticIDs::Note);
|
||||
}
|
||||
}
|
||||
|
||||
void SingleWorkItemBarrierCheck::storeOptions(
|
||||
ClangTidyOptions::OptionMap &Opts) {
|
||||
Options.store(Opts, "AOCVersion", AOCVersion);
|
||||
}
|
||||
|
||||
} // namespace altera
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
|
@ -0,0 +1,40 @@
|
|||
//===--- SingleWorkItemBarrierCheck.h - clang-tidy---------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
|
||||
|
||||
#include "../ClangTidyCheck.h"
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace altera {
|
||||
|
||||
/// Detects OpenCL kernel functions that call a barrier but do not call an
|
||||
/// ID-function function. These functions will be treated as single work-item
|
||||
/// kernels, which may be inefficient or cause an error.
|
||||
///
|
||||
/// For the user-facing documentation see:
|
||||
/// http://clang.llvm.org/extra/clang-tidy/checks/opencl-single-work-item-barrier.html
|
||||
class SingleWorkItemBarrierCheck : public ClangTidyCheck {
|
||||
const unsigned AOCVersion;
|
||||
|
||||
public:
|
||||
SingleWorkItemBarrierCheck(StringRef Name, ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
AOCVersion(Options.get("AOCVersion", 1600U)) {}
|
||||
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
|
||||
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
|
||||
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
|
||||
};
|
||||
|
||||
} // namespace altera
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
|
|
@ -99,6 +99,12 @@ New checks
|
|||
Finds kernel files and include directives whose filename is `kernel.cl`,
|
||||
`Verilog.cl`, or `VHDL.cl`.
|
||||
|
||||
- New :doc:`altera-single-work-item-barrier
|
||||
<clang-tidy/checks/altera-single-work-item-barrier>` check.
|
||||
|
||||
Finds OpenCL kernel functions that call a barrier function but do not call
|
||||
an ID function.
|
||||
|
||||
- New :doc:`altera-struct-pack-align
|
||||
<clang-tidy/checks/altera-struct-pack-align>` check.
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
.. title:: clang-tidy - altera-single-work-item-barrier
|
||||
|
||||
altera-single-work-item-barrier
|
||||
===============================
|
||||
|
||||
Finds OpenCL kernel functions that call a barrier function but do not call
|
||||
an ID function (``get_local_id``, ``get_local_id``, ``get_group_id``, or
|
||||
``get_local_linear_id``).
|
||||
|
||||
These kernels may be viable single work-item kernels, but will be forced to
|
||||
execute as NDRange kernels if using a newer version of the Altera Offline
|
||||
Compiler (>= v17.01).
|
||||
|
||||
If using an older version of the Altera Offline Compiler, these kernel
|
||||
functions will be treated as single work-item kernels, which could be
|
||||
inefficient or lead to errors if NDRange semantics were intended.
|
||||
|
||||
Based on the `Altera SDK for OpenCL: Best Practices Guide
|
||||
<https://www.altera.com/en_US/pdfs/literature/hb/opencl-sdk/aocl_optimization_guide.pdf>`_.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// error: function calls barrier but does not call an ID function.
|
||||
void __kernel barrier_no_id(__global int * foo, int size) {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
foo[i] += 5;
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
// ok: function calls barrier and an ID function.
|
||||
void __kernel barrier_with_id(__global int * foo, int size) {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
int tid = get_global_id(0);
|
||||
foo[tid] += 5;
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
// ok with AOC Version 17.01: the reqd_work_group_size turns this into
|
||||
// an NDRange.
|
||||
__attribute__((reqd_work_group_size(2,2,2)))
|
||||
void __kernel barrier_with_id(__global int * foo, int size) {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
foo[tid] += 5;
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
.. option:: AOCVersion
|
||||
|
||||
Defines the version of the Altera Offline Compiler. Defaults to ``1600``
|
||||
(corresponding to version 16.00).
|
|
@ -31,6 +31,7 @@ Clang-Tidy Checks
|
|||
`abseil-time-subtraction <abseil-time-subtraction.html>`_, "Yes"
|
||||
`abseil-upgrade-duration-conversions <abseil-upgrade-duration-conversions.html>`_, "Yes"
|
||||
`altera-kernel-name-restriction <altera-kernel-name-restriction.html>`_,
|
||||
`altera-single-work-item-barrier <altera-single-work-item-barrier.html>`_, "Yes"
|
||||
`altera-struct-pack-align <altera-struct-pack-align.html>`_,
|
||||
`android-cloexec-accept <android-cloexec-accept.html>`_, "Yes"
|
||||
`android-cloexec-accept4 <android-cloexec-accept4.html>`_,
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
// RUN: %check_clang_tidy -check-suffix=OLDCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLOLDAOC
|
||||
// RUN: %check_clang_tidy -check-suffix=NEWCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLOLDAOC
|
||||
// RUN: %check_clang_tidy -check-suffix=OLDCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLNEWAOC
|
||||
// RUN: %check_clang_tidy -check-suffix=NEWCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLNEWAOC
|
||||
|
||||
#ifdef OLDCLOLDAOC // OpenCL 1.2 Altera Offline Compiler < 17.1
|
||||
void __kernel error_barrier_no_id(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void __kernel success_barrier_global_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_local_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_both_ids(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_no_id(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_global_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_local_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NEWCLOLDAOC // OpenCL 2.0 Altera Offline Compiler < 17.1
|
||||
void __kernel error_barrier_no_id(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void __kernel success_barrier_global_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_local_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_both_ids(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_no_id(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_global_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_local_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OLDCLNEWAOC // OpenCL 1.2 Altera Offline Compiler >= 17.1
|
||||
void __kernel error_barrier_no_id(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((reqd_work_group_size(1,1,1)))
|
||||
void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((reqd_work_group_size(2,1,1)))
|
||||
void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void __kernel success_barrier_global_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_local_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_both_ids(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_no_id(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_global_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_local_id(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NEWCLNEWAOC // OpenCL 2.0 Altera Offline Compiler >= 17.1
|
||||
void __kernel error_barrier_no_id(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((reqd_work_group_size(1,1,1)))
|
||||
void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) {
|
||||
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((reqd_work_group_size(2,1,1)))
|
||||
void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void __kernel success_barrier_global_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_local_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void __kernel success_barrier_both_ids(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_no_id(__global int * foo, int size) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int i = 256; i < size; i+= 256) {
|
||||
foo[j] += foo[j+i];
|
||||
}
|
||||
}
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (int i = 1; i < 256; i++) {
|
||||
foo[0] += foo[i];
|
||||
}
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_global_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_global_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_local_id(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int tid = get_local_id(0);
|
||||
}
|
||||
|
||||
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
|
||||
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
}
|
||||
#endif
|
Loading…
Reference in a new issue