[LV] Enable scalable vectorization by default for SVE cores.
The availability of SVE should be sufficient to enable scalable auto-vectorization. This patch adds a new TTI interface to query the target what style of vectorization it wants when scalable vectors are available. For other targets than AArch64, this currently defaults to 'FixedWidthOnly'. Differential Revision: https://reviews.llvm.org/D115651
This commit is contained in:
parent
81967b4fa7
commit
b1ff20fd35
|
@ -1395,6 +1395,9 @@ public:
|
|||
/// \returns True if the target supports scalable vectors.
|
||||
bool supportsScalableVectors() const;
|
||||
|
||||
/// \return true when scalable vectorization is preferred.
|
||||
bool enableScalableVectorization() const;
|
||||
|
||||
/// \name Vector Predication Information
|
||||
/// @{
|
||||
/// Whether the target supports the %evl parameter of VP intrinsic efficiently
|
||||
|
@ -1761,6 +1764,7 @@ public:
|
|||
ReductionFlags) const = 0;
|
||||
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
|
||||
virtual unsigned getGISelRematGlobalCost() const = 0;
|
||||
virtual bool enableScalableVectorization() const = 0;
|
||||
virtual bool supportsScalableVectors() const = 0;
|
||||
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
|
||||
Align Alignment) const = 0;
|
||||
|
@ -2362,6 +2366,10 @@ public:
|
|||
return Impl.supportsScalableVectors();
|
||||
}
|
||||
|
||||
bool enableScalableVectorization() const override {
|
||||
return Impl.enableScalableVectorization();
|
||||
}
|
||||
|
||||
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
|
||||
Align Alignment) const override {
|
||||
return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
|
||||
|
|
|
@ -778,6 +778,8 @@ public:
|
|||
|
||||
bool supportsScalableVectors() const { return false; }
|
||||
|
||||
bool enableScalableVectorization() const { return false; }
|
||||
|
||||
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
|
||||
Align Alignment) const {
|
||||
return false;
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Support/TypeSize.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
|
||||
|
@ -104,14 +105,12 @@ public:
|
|||
/// Vectorize loops using scalable vectors or fixed-width vectors, but favor
|
||||
/// scalable vectors when the cost-model is inconclusive. This is the
|
||||
/// default when the scalable.enable hint is enabled through a pragma.
|
||||
SK_PreferScalable = 1,
|
||||
/// Vectorize loops using scalable vectors or fixed-width vectors, but
|
||||
/// favor fixed-width vectors when the cost is inconclusive.
|
||||
SK_PreferFixedWidth = 2,
|
||||
SK_PreferScalable = 1
|
||||
};
|
||||
|
||||
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
|
||||
OptimizationRemarkEmitter &ORE);
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
const TargetTransformInfo *TTI = nullptr);
|
||||
|
||||
/// Mark the loop L as already vectorized by setting the width to 1.
|
||||
void setAlreadyVectorized();
|
||||
|
@ -123,9 +122,10 @@ public:
|
|||
void emitRemarkWithHints() const;
|
||||
|
||||
ElementCount getWidth() const {
|
||||
return ElementCount::get(Width.Value,
|
||||
isScalableVectorizationExplicitlyEnabled());
|
||||
return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value ==
|
||||
SK_PreferScalable);
|
||||
}
|
||||
|
||||
unsigned getInterleave() const {
|
||||
if (Interleave.Value)
|
||||
return Interleave.Value;
|
||||
|
@ -144,22 +144,9 @@ public:
|
|||
return (ForceKind)Force.Value;
|
||||
}
|
||||
|
||||
/// \return true if the cost-model for scalable vectorization should
|
||||
/// favor vectorization with scalable vectors over fixed-width vectors when
|
||||
/// the cost-model is inconclusive.
|
||||
bool isScalableVectorizationPreferred() const {
|
||||
return Scalable.Value == SK_PreferScalable;
|
||||
}
|
||||
|
||||
/// \return true if scalable vectorization has been explicitly enabled.
|
||||
bool isScalableVectorizationExplicitlyEnabled() const {
|
||||
return Scalable.Value == SK_PreferFixedWidth ||
|
||||
Scalable.Value == SK_PreferScalable;
|
||||
}
|
||||
|
||||
/// \return true if scalable vectorization has been explicitly disabled.
|
||||
bool isScalableVectorizationDisabled() const {
|
||||
return Scalable.Value == SK_FixedWidthOnly;
|
||||
return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
|
||||
}
|
||||
|
||||
/// If hints are provided that force vectorization, use the AlwaysPrint
|
||||
|
|
|
@ -1072,6 +1072,10 @@ bool TargetTransformInfo::supportsScalableVectors() const {
|
|||
return TTIImpl->supportsScalableVectors();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::enableScalableVectorization() const {
|
||||
return TTIImpl->enableScalableVectorization();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
|
||||
Align Alignment) const {
|
||||
return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
|
||||
|
|
|
@ -309,6 +309,8 @@ public:
|
|||
|
||||
bool supportsScalableVectors() const { return ST->hasSVE(); }
|
||||
|
||||
bool enableScalableVectorization() const { return ST->hasSVE(); }
|
||||
|
||||
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
|
||||
ElementCount VF) const;
|
||||
|
||||
|
|
|
@ -55,22 +55,19 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
|
|||
cl::desc("The maximum number of SCEV checks allowed with a "
|
||||
"vectorize(enable) pragma"));
|
||||
|
||||
// FIXME: When scalable vectorization is stable enough, change the default
|
||||
// to SK_PreferFixedWidth.
|
||||
static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
|
||||
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
|
||||
cl::Hidden,
|
||||
cl::desc("Control whether the compiler can use scalable vectors to "
|
||||
"vectorize a loop"),
|
||||
cl::values(
|
||||
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
|
||||
"Scalable vectorization is disabled."),
|
||||
clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
|
||||
"Scalable vectorization is available, but favor fixed-width "
|
||||
"vectorization when the cost is inconclusive."),
|
||||
clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
|
||||
"Scalable vectorization is available and favored when the "
|
||||
"cost is inconclusive.")));
|
||||
static cl::opt<LoopVectorizeHints::ScalableForceKind>
|
||||
ForceScalableVectorization(
|
||||
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
|
||||
cl::Hidden,
|
||||
cl::desc("Control whether the compiler can use scalable vectors to "
|
||||
"vectorize a loop"),
|
||||
cl::values(
|
||||
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
|
||||
"Scalable vectorization is disabled."),
|
||||
clEnumValN(
|
||||
LoopVectorizeHints::SK_PreferScalable, "on",
|
||||
"Scalable vectorization is available and favored when the "
|
||||
"cost is inconclusive.")));
|
||||
|
||||
/// Maximum vectorization interleave count.
|
||||
static const unsigned MaxInterleaveFactor = 16;
|
||||
|
@ -95,7 +92,8 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
|
|||
|
||||
LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
||||
bool InterleaveOnlyWhenForced,
|
||||
OptimizationRemarkEmitter &ORE)
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
const TargetTransformInfo *TTI)
|
||||
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
|
||||
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
|
||||
Force("vectorize.enable", FK_Undefined, HK_FORCE),
|
||||
|
@ -110,14 +108,32 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
|||
if (VectorizerParams::isInterleaveForced())
|
||||
Interleave.Value = VectorizerParams::VectorizationInterleave;
|
||||
|
||||
// If the metadata doesn't explicitly specify whether to enable scalable
|
||||
// vectorization, then decide based on the following criteria (increasing
|
||||
// level of priority):
|
||||
// - Target default
|
||||
// - Metadata width
|
||||
// - Force option (always overrides)
|
||||
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
|
||||
if (TTI)
|
||||
Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
|
||||
: SK_FixedWidthOnly;
|
||||
|
||||
if (Width.Value)
|
||||
// If the width is set, but the metadata says nothing about the scalable
|
||||
// property, then assume it concerns only a fixed-width UserVF.
|
||||
// If width is not set, the flag takes precedence.
|
||||
Scalable.Value = SK_FixedWidthOnly;
|
||||
}
|
||||
|
||||
// If the flag is set to force any use of scalable vectors, override the loop
|
||||
// hints.
|
||||
if (ForceScalableVectorization.getValue() !=
|
||||
LoopVectorizeHints::SK_Unspecified)
|
||||
Scalable.Value = ForceScalableVectorization.getValue();
|
||||
|
||||
// Scalable vectorization is disabled if no preference is specified.
|
||||
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
|
||||
// If the width is set, but the metadata says nothing about the scalable
|
||||
// property, then assume it concerns only a fixed-width UserVF.
|
||||
// If width is not set, the flag takes precedence.
|
||||
Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
|
||||
else if (ScalableVectorization == SK_FixedWidthOnly)
|
||||
// If the flag is set to disable any use of scalable vectors, override the
|
||||
// loop hint.
|
||||
Scalable.Value = SK_FixedWidthOnly;
|
||||
|
||||
if (IsVectorized.Value != 1)
|
||||
|
|
|
@ -3562,7 +3562,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
|
|||
if (MDNode *LID = OrigLoop->getLoopID())
|
||||
L->setLoopID(LID);
|
||||
|
||||
LoopVectorizeHints Hints(L, true, *ORE);
|
||||
LoopVectorizeHints Hints(L, true, *ORE, TTI);
|
||||
Hints.setAlreadyVectorized();
|
||||
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
|
@ -5699,12 +5699,11 @@ bool LoopVectorizationCostModel::isMoreProfitable(
|
|||
EstimatedWidthB *= VScale.getValue();
|
||||
}
|
||||
|
||||
// When set to preferred, for now assume vscale may be larger than 1 (or the
|
||||
// one being tuned for), so that scalable vectorization is slightly favorable
|
||||
// over fixed-width vectorization.
|
||||
if (Hints->isScalableVectorizationPreferred())
|
||||
if (A.Width.isScalable() && !B.Width.isScalable())
|
||||
return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
|
||||
// Assume vscale may be larger than 1 (or the value being tuned for),
|
||||
// so that scalable vectorization is slightly favorable over fixed-width
|
||||
// vectorization.
|
||||
if (A.Width.isScalable() && !B.Width.isScalable())
|
||||
return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
|
||||
|
||||
// To avoid the need for FP division:
|
||||
// (CostA / A.Width) < (CostB / B.Width)
|
||||
|
@ -10250,7 +10249,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||
<< L->getHeader()->getParent()->getName() << "\" from "
|
||||
<< DebugLocStr << "\n");
|
||||
|
||||
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE);
|
||||
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
|
||||
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LV: Loop hints:"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
|
||||
; This test currently fails when the LV calculates a maximums safe
|
||||
; distance for scalable vectors, because the code to eliminate the tail is
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
|
||||
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
|
||||
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
|
||||
|
||||
; We vectorize this first order recurrence, with a set of insertelements for
|
||||
; each unrolled part. Make sure these insertelements are generated in-order,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -force-vector-width=2 -scalable-vectorization=preferred -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -force-vector-width=2 -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-REMARKS < %t
|
||||
|
||||
; CHECK-REMARKS: UserVF ignored because of invalid costs.
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
|
||||
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-target-instruction-cost=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
|
||||
; RUN: -scalable-vectorization=on -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
|
||||
|
||||
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S 2>%t | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
|
||||
|
||||
; Reduction can be vectorized
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
|
||||
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
|
||||
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
|
||||
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
|
||||
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
|
||||
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
|
||||
|
||||
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
|
||||
; CHECK-ORDERED-LABEL: @fadd_strict
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=aarch64 -mattr=+sve -scalable-vectorization=on \
|
||||
; RUN: opt -mtriple=aarch64 -mattr=+sve \
|
||||
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4
|
||||
|
||||
; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -scalable-vectorization=on \
|
||||
; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic \
|
||||
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4
|
||||
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 -scalable-vectorization=on \
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 \
|
||||
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-V1,VF-VSCALE4
|
||||
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=on \
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \
|
||||
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-4
|
||||
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4
|
||||
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=preferred \
|
||||
; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \
|
||||
; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4
|
||||
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
|
||||
|
||||
; Test that the MaxVF for the following loop, that has no dependence distances,
|
||||
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
|
||||
|
@ -11,12 +10,10 @@ define void @test0(i32* %a, i8* %b, i32* %c) #0 {
|
|||
; CHECK: LV: Checking a loop in "test0"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: vscale x 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -44,12 +41,10 @@ define void @test1(i32* %a, i8* %b) #0 {
|
|||
; CHECK: LV: Checking a loop in "test1"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -77,13 +72,11 @@ exit:
|
|||
define void @test2(i32* %a, i8* %b) #0 {
|
||||
; CHECK: LV: Checking a loop in "test2"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 2
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -112,12 +105,10 @@ define void @test3(i32* %a, i8* %b) #0 {
|
|||
; CHECK: LV: Checking a loop in "test3"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -145,13 +136,12 @@ exit:
|
|||
define void @test4(i32* %a, i32* %b) #0 {
|
||||
; CHECK: LV: Checking a loop in "test4"
|
||||
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
|
||||
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -scalable-vectorization=on < %s -S | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve < %s -S | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
|
||||
|
||||
define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
|
||||
; CHECK-LABEL: @cond_inv_load_i32i32i16(
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S -scalable-vectorization=preferred 2>%t | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED
|
||||
|
||||
target triple = "aarch64-linux-gnu"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -force-target-instruction-cost=1 -o - | FileCheck %s
|
||||
|
||||
define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
|
||||
; CHECK-LABEL: @gather_nxv4i32_ind64(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
|
||||
target triple = "aarch64-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -scalable-vectorization=on -loop-vectorize -S < %s -debug 2>%t | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -S < %s -debug 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -dce -instcombine < %s -S | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -dce -instcombine < %s -S | FileCheck %s
|
||||
|
||||
target triple = "aarch64-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -scalable-vectorization=on -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
|
||||
; CHECK-LABEL: @invariant_load
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=on -S < %s | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -S < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
|
||||
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s
|
||||
|
||||
define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
|
||||
; CHECK-LABEL: @stride7_i32(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
|
||||
|
||||
define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
|
||||
; CHECK-LABEL: @mloadstore_f32
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
|
||||
; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
|
||||
; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
|
||||
|
||||
target triple = "aarch64-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
|
||||
; RUN: -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4
|
||||
; RUN: -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4
|
||||
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
|
||||
; RUN: -scalable-vectorization=on -force-vector-width=8 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF8
|
||||
; RUN: -force-vector-width=8 -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF8
|
||||
; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
|
||||
; RUN: -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2
|
||||
; RUN: -force-vector-width=4 -force-vector-interleave=1 -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2
|
||||
|
||||
target triple="aarch64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -scalable-vectorization=preferred -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s
|
||||
|
||||
; CHECK-NOT: vector.body:
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine < %s -S | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
; The test checks if the mask is being correctly created, reverted and used
|
||||
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; for (int i = N-1; i >= 0; --i)
|
||||
; a[i] = b[i] + 1.0;
|
||||
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
|
||||
|
||||
define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{
|
||||
; CHECK-LABEL: @vector_reverse_f64(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -scalable-vectorization=on < %s | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -loop-vectorize -scalable-vectorization=on -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s | FileCheck %s
|
||||
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
|
||||
|
||||
; Ensure that we can vectorize loops such as:
|
||||
; int *ptr = c;
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=on | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=preferred | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
Loading…
Reference in a new issue