[LV] Enable scalable vectorization by default for SVE cores.

The availability of SVE should be sufficient to enable scalable
auto-vectorization.

This patch adds a new TTI interface to query the target what style of
vectorization it wants when scalable vectors are available. For other
targets than AArch64, this currently defaults to 'FixedWidthOnly'.

Differential Revision: https://reviews.llvm.org/D115651
This commit is contained in:
Sander de Smalen 2021-12-13 16:52:35 +00:00
parent 81967b4fa7
commit b1ff20fd35
40 changed files with 137 additions and 130 deletions

View file

@ -1395,6 +1395,9 @@ public:
/// \returns True if the target supports scalable vectors.
bool supportsScalableVectors() const;
/// \return true when scalable vectorization is preferred.
bool enableScalableVectorization() const;
/// \name Vector Predication Information
/// @{
/// Whether the target supports the %evl parameter of VP intrinsic efficiently
@ -1761,6 +1764,7 @@ public:
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual bool enableScalableVectorization() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const = 0;
@ -2362,6 +2366,10 @@ public:
return Impl.supportsScalableVectors();
}
bool enableScalableVectorization() const override {
return Impl.enableScalableVectorization();
}
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const override {
return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);

View file

@ -778,6 +778,8 @@ public:
bool supportsScalableVectors() const { return false; }
bool enableScalableVectorization() const { return false; }
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const {
return false;

View file

@ -29,6 +29,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@ -104,14 +105,12 @@ public:
/// Vectorize loops using scalable vectors or fixed-width vectors, but favor
/// scalable vectors when the cost-model is inconclusive. This is the
/// default when the scalable.enable hint is enabled through a pragma.
SK_PreferScalable = 1,
/// Vectorize loops using scalable vectors or fixed-width vectors, but
/// favor fixed-width vectors when the cost is inconclusive.
SK_PreferFixedWidth = 2,
SK_PreferScalable = 1
};
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE);
OptimizationRemarkEmitter &ORE,
const TargetTransformInfo *TTI = nullptr);
/// Mark the loop L as already vectorized by setting the width to 1.
void setAlreadyVectorized();
@ -123,9 +122,10 @@ public:
void emitRemarkWithHints() const;
ElementCount getWidth() const {
return ElementCount::get(Width.Value,
isScalableVectorizationExplicitlyEnabled());
return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value ==
SK_PreferScalable);
}
unsigned getInterleave() const {
if (Interleave.Value)
return Interleave.Value;
@ -144,22 +144,9 @@ public:
return (ForceKind)Force.Value;
}
/// \return true if the cost-model for scalable vectorization should
/// favor vectorization with scalable vectors over fixed-width vectors when
/// the cost-model is inconclusive.
bool isScalableVectorizationPreferred() const {
return Scalable.Value == SK_PreferScalable;
}
/// \return true if scalable vectorization has been explicitly enabled.
bool isScalableVectorizationExplicitlyEnabled() const {
return Scalable.Value == SK_PreferFixedWidth ||
Scalable.Value == SK_PreferScalable;
}
/// \return true if scalable vectorization has been explicitly disabled.
bool isScalableVectorizationDisabled() const {
return Scalable.Value == SK_FixedWidthOnly;
return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
}
/// If hints are provided that force vectorization, use the AlwaysPrint

View file

@ -1072,6 +1072,10 @@ bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}
bool TargetTransformInfo::enableScalableVectorization() const {
return TTIImpl->enableScalableVectorization();
}
bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const {
return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);

View file

@ -309,6 +309,8 @@ public:
bool supportsScalableVectors() const { return ST->hasSVE(); }
bool enableScalableVectorization() const { return ST->hasSVE(); }
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;

View file

@ -55,22 +55,19 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
cl::desc("The maximum number of SCEV checks allowed with a "
"vectorize(enable) pragma"));
// FIXME: When scalable vectorization is stable enough, change the default
// to SK_PreferFixedWidth.
static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
cl::Hidden,
cl::desc("Control whether the compiler can use scalable vectors to "
"vectorize a loop"),
cl::values(
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
"Scalable vectorization is disabled."),
clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
"Scalable vectorization is available, but favor fixed-width "
"vectorization when the cost is inconclusive."),
clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
"Scalable vectorization is available and favored when the "
"cost is inconclusive.")));
static cl::opt<LoopVectorizeHints::ScalableForceKind>
ForceScalableVectorization(
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
cl::Hidden,
cl::desc("Control whether the compiler can use scalable vectors to "
"vectorize a loop"),
cl::values(
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
"Scalable vectorization is disabled."),
clEnumValN(
LoopVectorizeHints::SK_PreferScalable, "on",
"Scalable vectorization is available and favored when the "
"cost is inconclusive.")));
/// Maximum vectorization interleave count.
static const unsigned MaxInterleaveFactor = 16;
@ -95,7 +92,8 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE)
OptimizationRemarkEmitter &ORE,
const TargetTransformInfo *TTI)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
@ -110,14 +108,32 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
if (VectorizerParams::isInterleaveForced())
Interleave.Value = VectorizerParams::VectorizationInterleave;
// If the metadata doesn't explicitly specify whether to enable scalable
// vectorization, then decide based on the following criteria (increasing
// level of priority):
// - Target default
// - Metadata width
// - Force option (always overrides)
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
if (TTI)
Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
: SK_FixedWidthOnly;
if (Width.Value)
// If the width is set, but the metadata says nothing about the scalable
// property, then assume it concerns only a fixed-width UserVF.
// If width is not set, the flag takes precedence.
Scalable.Value = SK_FixedWidthOnly;
}
// If the flag is set to force any use of scalable vectors, override the loop
// hints.
if (ForceScalableVectorization.getValue() !=
LoopVectorizeHints::SK_Unspecified)
Scalable.Value = ForceScalableVectorization.getValue();
// Scalable vectorization is disabled if no preference is specified.
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
// If the width is set, but the metadata says nothing about the scalable
// property, then assume it concerns only a fixed-width UserVF.
// If width is not set, the flag takes precedence.
Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
else if (ScalableVectorization == SK_FixedWidthOnly)
// If the flag is set to disable any use of scalable vectors, override the
// loop hint.
Scalable.Value = SK_FixedWidthOnly;
if (IsVectorized.Value != 1)

View file

@ -3562,7 +3562,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
if (MDNode *LID = OrigLoop->getLoopID())
L->setLoopID(LID);
LoopVectorizeHints Hints(L, true, *ORE);
LoopVectorizeHints Hints(L, true, *ORE, TTI);
Hints.setAlreadyVectorized();
#ifdef EXPENSIVE_CHECKS
@ -5699,12 +5699,11 @@ bool LoopVectorizationCostModel::isMoreProfitable(
EstimatedWidthB *= VScale.getValue();
}
// When set to preferred, for now assume vscale may be larger than 1 (or the
// one being tuned for), so that scalable vectorization is slightly favorable
// over fixed-width vectorization.
if (Hints->isScalableVectorizationPreferred())
if (A.Width.isScalable() && !B.Width.isScalable())
return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
// Assume vscale may be larger than 1 (or the value being tuned for),
// so that scalable vectorization is slightly favorable over fixed-width
// vectorization.
if (A.Width.isScalable() && !B.Width.isScalable())
return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
// To avoid the need for FP division:
// (CostA / A.Width) < (CostB / B.Width)
@ -10250,7 +10249,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
<< L->getHeader()->getParent()->getName() << "\" from "
<< DebugLocStr << "\n");
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE);
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
LLVM_DEBUG(
dbgs() << "LV: Loop hints:"

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
; This test currently fails when the LV calculates a maximums safe
; distance for scalable vectors, because the code to eliminate the tail is

View file

@ -1,5 +1,5 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
; We vectorize this first order recurrence, with a set of insertelements for
; each unrolled part. Make sure these insertelements are generated in-order,

View file

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -force-vector-width=2 -scalable-vectorization=preferred -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -force-vector-width=2 -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: FileCheck %s --check-prefix=CHECK-REMARKS < %t
; CHECK-REMARKS: UserVF ignored because of invalid costs.

View file

@ -1,8 +1,8 @@
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-target-instruction-cost=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
; RUN: -scalable-vectorization=on -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {

View file

@ -1,5 +1,5 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S 2>%t | FileCheck %s -check-prefix=CHECK
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
; Reduction can be vectorized

View file

@ -1,8 +1,8 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict

View file

@ -1,21 +1,21 @@
; REQUIRES: asserts
; RUN: opt -mtriple=aarch64 -mattr=+sve -scalable-vectorization=on \
; RUN: opt -mtriple=aarch64 -mattr=+sve \
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4
; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -scalable-vectorization=on \
; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic \
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 -scalable-vectorization=on \
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 \
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-V1,VF-VSCALE4
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=on \
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-4
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=preferred \
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4

View file

@ -1,8 +1,7 @@
; REQUIRES: asserts
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
; Test that the MaxVF for the following loop, that has no dependence distances,
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
@ -11,12 +10,10 @@ define void @test0(i32* %a, i8* %b, i32* %c) #0 {
; CHECK: LV: Checking a loop in "test0"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: vscale x 16
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
entry:
br label %loop
@ -44,12 +41,10 @@ define void @test1(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test1"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
entry:
br label %loop
@ -77,13 +72,11 @@ exit:
define void @test2(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test2"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
entry:
br label %loop
@ -112,12 +105,10 @@ define void @test3(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test3"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
entry:
br label %loop
@ -145,13 +136,12 @@ exit:
define void @test4(i32* %a, i32* %b) #0 {
; CHECK: LV: Checking a loop in "test4"
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 4
; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
entry:
br label %loop

View file

@ -1,7 +1,7 @@
; REQUIRES: asserts
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -scalable-vectorization=on < %s -S | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve < %s -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

View file

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_i32i32i16(

View file

@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: asserts
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S -scalable-vectorization=preferred 2>%t | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED
target triple = "aarch64-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -force-target-instruction-cost=1 -o - | FileCheck %s
define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64(

View file

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
; RUN: opt < %s -loop-vectorize -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
target triple = "aarch64-linux-gnu"

View file

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: opt -scalable-vectorization=on -loop-vectorize -S < %s -debug 2>%t | FileCheck %s
; RUN: opt -loop-vectorize -S < %s -debug 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -dce -instcombine < %s -S | FileCheck %s
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -dce -instcombine < %s -S | FileCheck %s
target triple = "aarch64-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt -S -loop-vectorize -scalable-vectorization=on -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
; CHECK-LABEL: @invariant_load

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -S < %s | FileCheck %s
; RUN: opt -loop-vectorize -S < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s
define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_i32(

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
; CHECK-LABEL: @mloadstore_f32

View file

@ -1,5 +1,5 @@
; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
target triple = "aarch64-linux-gnu"

View file

@ -1,10 +1,10 @@
; REQUIRES: asserts
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
; RUN: -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4
; RUN: -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
; RUN: -scalable-vectorization=on -force-vector-width=8 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF8
; RUN: -force-vector-width=8 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF8
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
; RUN: -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2
; RUN: -force-vector-width=4 -force-vector-interleave=1 -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2
target triple="aarch64-unknown-linux-gnu"

View file

@ -1,4 +1,4 @@
; RUN: opt -S -loop-vectorize -scalable-vectorization=preferred -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s
; RUN: opt -S -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s
; CHECK-NOT: vector.body:

View file

@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine < %s -S | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"

View file

@ -10,7 +10,7 @@
; The test checks if the mask is being correctly created, reverted and used
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

View file

@ -5,7 +5,7 @@
; for (int i = N-1; i >= 0; --i)
; a[i] = b[i] + 1.0;
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{
; CHECK-LABEL: @vector_reverse_f64(

View file

@ -1,4 +1,4 @@
; RUN: opt -S -loop-vectorize -scalable-vectorization=on < %s | FileCheck %s
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: asserts
; RUN: opt -loop-vectorize -scalable-vectorization=on -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
; RUN: opt -loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"

View file

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s | FileCheck %s
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
; Ensure that we can vectorize loops such as:
; int *ptr = c;

View file

@ -1,5 +1,4 @@
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=on | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=preferred | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"