new altera single work item barrier check

This lint check is a part of the FLOCL (FPGA Linters for OpenCL)
project out of the Synergy Lab at Virginia Tech.

FLOCL is a set of lint checks aimed at FPGA developers who write code
in OpenCL.

The altera single work item barrier check finds OpenCL kernel functions
that call a barrier function but do not call an ID function. These
kernel functions will be treated as single work-item kernels, which
could be inefficient or lead to errors.

Based on the "Altera SDK for OpenCL: Best Practices Guide."
This commit is contained in:
Frank Derry Wanye 2020-12-18 07:49:48 -05:00 committed by Aaron Ballman
parent e35f9229dc
commit e69e551e0e
8 changed files with 493 additions and 0 deletions

View file

@ -10,6 +10,7 @@
#include "../ClangTidyModule.h"
#include "../ClangTidyModuleRegistry.h"
#include "KernelNameRestrictionCheck.h"
#include "SingleWorkItemBarrierCheck.h"
#include "StructPackAlignCheck.h"
using namespace clang::ast_matchers;
@ -23,6 +24,8 @@ public:
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
CheckFactories.registerCheck<KernelNameRestrictionCheck>(
"altera-kernel-name-restriction");
CheckFactories.registerCheck<SingleWorkItemBarrierCheck>(
"altera-single-work-item-barrier");
CheckFactories.registerCheck<StructPackAlignCheck>(
"altera-struct-pack-align");
}

View file

@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangTidyAlteraModule
AlteraTidyModule.cpp
KernelNameRestrictionCheck.cpp
SingleWorkItemBarrierCheck.cpp
StructPackAlignCheck.cpp
LINK_LIBS

View file

@ -0,0 +1,84 @@
//===--- SingleWorkItemBarrierCheck.cpp - clang-tidy-----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SingleWorkItemBarrierCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
using namespace clang::ast_matchers;
namespace clang {
namespace tidy {
namespace altera {
void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) {
// Find any function that calls barrier but does not call an ID function.
// hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions.
// FIXME: Have it accept all functions but check for a parameter that gets an
// ID from one of the four ID functions.
Finder->addMatcher(
// Find function declarations...
functionDecl(
allOf(
// That are OpenCL kernels...
hasAttr(attr::Kind::OpenCLKernel),
// And call a barrier function (either 1.x or 2.x version)...
forEachDescendant(callExpr(callee(functionDecl(hasAnyName(
"barrier", "work_group_barrier"))))
.bind("barrier")),
// But do not call an ID function.
unless(hasDescendant(callExpr(callee(functionDecl(
hasAnyName("get_global_id", "get_local_id", "get_group_id",
"get_local_linear_id"))))))))
.bind("function"),
this);
}
void SingleWorkItemBarrierCheck::check(const MatchFinder::MatchResult &Result) {
const auto *MatchedDecl = Result.Nodes.getNodeAs<FunctionDecl>("function");
const auto *MatchedBarrier = Result.Nodes.getNodeAs<CallExpr>("barrier");
if (AOCVersion < 1701) {
// get_group_id and get_local_linear_id were added at/after v17.01
diag(MatchedDecl->getLocation(),
"kernel function %0 does not call 'get_global_id' or 'get_local_id' "
"and will be treated as a single work-item")
<< MatchedDecl;
diag(MatchedBarrier->getBeginLoc(),
"barrier call is in a single work-item and may error out",
DiagnosticIDs::Note);
} else {
// If reqd_work_group_size is anything other than (1,1,1), it will be
// interpreted as an NDRange in AOC version >= 17.1.
bool IsNDRange = false;
if (MatchedDecl->hasAttr<ReqdWorkGroupSizeAttr>()) {
const auto *Attribute = MatchedDecl->getAttr<ReqdWorkGroupSizeAttr>();
if (Attribute->getXDim() > 1 || Attribute->getYDim() > 1 ||
Attribute->getZDim() > 1)
IsNDRange = true;
}
if (IsNDRange) // No warning if kernel is treated as an NDRange.
return;
diag(MatchedDecl->getLocation(),
"kernel function %0 does not call an ID function and may be a viable "
"single work-item, but will be forced to execute as an NDRange")
<< MatchedDecl;
diag(MatchedBarrier->getBeginLoc(),
"barrier call will force NDRange execution; if single work-item "
"semantics are desired a mem_fence may be more efficient",
DiagnosticIDs::Note);
}
}
void SingleWorkItemBarrierCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "AOCVersion", AOCVersion);
}
} // namespace altera
} // namespace tidy
} // namespace clang

View file

@ -0,0 +1,40 @@
//===--- SingleWorkItemBarrierCheck.h - clang-tidy---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
#include "../ClangTidyCheck.h"
namespace clang {
namespace tidy {
namespace altera {
/// Detects OpenCL kernel functions that call a barrier but do not call an
/// ID-function function. These functions will be treated as single work-item
/// kernels, which may be inefficient or cause an error.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/opencl-single-work-item-barrier.html
class SingleWorkItemBarrierCheck : public ClangTidyCheck {
const unsigned AOCVersion;
public:
SingleWorkItemBarrierCheck(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
AOCVersion(Options.get("AOCVersion", 1600U)) {}
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
};
} // namespace altera
} // namespace tidy
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H

View file

@ -99,6 +99,12 @@ New checks
Finds kernel files and include directives whose filename is `kernel.cl`,
`Verilog.cl`, or `VHDL.cl`.
- New :doc:`altera-single-work-item-barrier
<clang-tidy/checks/altera-single-work-item-barrier>` check.
Finds OpenCL kernel functions that call a barrier function but do not call
an ID function.
- New :doc:`altera-struct-pack-align
<clang-tidy/checks/altera-struct-pack-align>` check.

View file

@ -0,0 +1,58 @@
.. title:: clang-tidy - altera-single-work-item-barrier
altera-single-work-item-barrier
===============================
Finds OpenCL kernel functions that call a barrier function but do not call
an ID function (``get_local_id``, ``get_local_id``, ``get_group_id``, or
``get_local_linear_id``).
These kernels may be viable single work-item kernels, but will be forced to
execute as NDRange kernels if using a newer version of the Altera Offline
Compiler (>= v17.01).
If using an older version of the Altera Offline Compiler, these kernel
functions will be treated as single work-item kernels, which could be
inefficient or lead to errors if NDRange semantics were intended.
Based on the `Altera SDK for OpenCL: Best Practices Guide
<https://www.altera.com/en_US/pdfs/literature/hb/opencl-sdk/aocl_optimization_guide.pdf>`_.
Examples:
.. code-block:: c++
// error: function calls barrier but does not call an ID function.
void __kernel barrier_no_id(__global int * foo, int size) {
for (int i = 0; i < 100; i++) {
foo[i] += 5;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
// ok: function calls barrier and an ID function.
void __kernel barrier_with_id(__global int * foo, int size) {
for (int i = 0; i < 100; i++) {
int tid = get_global_id(0);
foo[tid] += 5;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
// ok with AOC Version 17.01: the reqd_work_group_size turns this into
// an NDRange.
__attribute__((reqd_work_group_size(2,2,2)))
void __kernel barrier_with_id(__global int * foo, int size) {
for (int i = 0; i < 100; i++) {
foo[tid] += 5;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
Options
-------
.. option:: AOCVersion
Defines the version of the Altera Offline Compiler. Defaults to ``1600``
(corresponding to version 16.00).

View file

@ -31,6 +31,7 @@ Clang-Tidy Checks
`abseil-time-subtraction <abseil-time-subtraction.html>`_, "Yes"
`abseil-upgrade-duration-conversions <abseil-upgrade-duration-conversions.html>`_, "Yes"
`altera-kernel-name-restriction <altera-kernel-name-restriction.html>`_,
`altera-single-work-item-barrier <altera-single-work-item-barrier.html>`_, "Yes"
`altera-struct-pack-align <altera-struct-pack-align.html>`_,
`android-cloexec-accept <android-cloexec-accept.html>`_, "Yes"
`android-cloexec-accept4 <android-cloexec-accept4.html>`_,

View file

@ -0,0 +1,300 @@
// RUN: %check_clang_tidy -check-suffix=OLDCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLOLDAOC
// RUN: %check_clang_tidy -check-suffix=NEWCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLOLDAOC
// RUN: %check_clang_tidy -check-suffix=OLDCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLNEWAOC
// RUN: %check_clang_tidy -check-suffix=NEWCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLNEWAOC
#ifdef OLDCLOLDAOC // OpenCL 1.2 Altera Offline Compiler < 17.1
void __kernel error_barrier_no_id(__global int * foo, int size) {
// CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void __kernel success_barrier_global_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void __kernel success_barrier_local_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void __kernel success_barrier_both_ids(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
void success_nokernel_barrier_no_id(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void success_nokernel_barrier_global_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void success_nokernel_barrier_local_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
#endif
#ifdef NEWCLOLDAOC // OpenCL 2.0 Altera Offline Compiler < 17.1
void __kernel error_barrier_no_id(__global int * foo, int size) {
// CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void __kernel success_barrier_global_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void __kernel success_barrier_local_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void __kernel success_barrier_both_ids(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
void success_nokernel_barrier_no_id(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void success_nokernel_barrier_global_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void success_nokernel_barrier_local_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
#endif
#ifdef OLDCLNEWAOC // OpenCL 1.2 Altera Offline Compiler >= 17.1
void __kernel error_barrier_no_id(__global int * foo, int size) {
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
__attribute__ ((reqd_work_group_size(1,1,1)))
void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) {
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
__attribute__ ((reqd_work_group_size(2,1,1)))
void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void __kernel success_barrier_global_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void __kernel success_barrier_local_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void __kernel success_barrier_both_ids(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
void success_nokernel_barrier_no_id(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void success_nokernel_barrier_global_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void success_nokernel_barrier_local_id(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
#endif
#ifdef NEWCLNEWAOC // OpenCL 2.0 Altera Offline Compiler >= 17.1
void __kernel error_barrier_no_id(__global int * foo, int size) {
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
__attribute__ ((reqd_work_group_size(1,1,1)))
void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) {
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier]
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
// CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
__attribute__ ((reqd_work_group_size(2,1,1)))
void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void __kernel success_barrier_global_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void __kernel success_barrier_local_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void __kernel success_barrier_both_ids(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
void success_nokernel_barrier_no_id(__global int * foo, int size) {
for (int j = 0; j < 256; j++) {
for (int i = 256; i < size; i+= 256) {
foo[j] += foo[j+i];
}
}
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
for (int i = 1; i < 256; i++) {
foo[0] += foo[i];
}
}
void success_nokernel_barrier_global_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_global_id(0);
}
void success_nokernel_barrier_local_id(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int tid = get_local_id(0);
}
void success_nokernel_barrier_both_ids(__global int * foo, int size) {
work_group_barrier(CLK_GLOBAL_MEM_FENCE);
int gid = get_global_id(0);
int lid = get_local_id(0);
}
#endif