[BOLT] CMOVConversion pass
Convert simple hammocks into cmov based on misprediction rate. Test Plan: - Assembly test: `cmov-conversion.s` - Testing on a binary: # Bootstrap clang with `-x86-cmov-converter-force-all` and `-Wl,--emit-relocs` (Release build) # Collect perf.data: - `clang++ <opts> bolt/lib/Core/BinaryFunction.cpp -E > bf.cpp` - `perf record -e cycles:u -j any,u -- clang-15 bf.cpp -O2 -std=c++14 -c -o bf.o` # Optimize clang-15 with and w/o -cmov-conversion: - `llvm-bolt clang-15 -p perf.data -o clang-15.bolt` - `llvm-bolt clang-15 -p perf.data -cmov-conversion -o clang-15.bolt.cmovconv` # Run perf experiment: - test: `clang-15.bolt.cmovconv`, - control: `clang-15.bolt`, - workload (clang options): `bf.cpp -O2 -std=c++14 -c -o bf.o` Results: ``` task-clock [delta: -360.21 ± 356.75, delta(%): -1.7760 ± 1.7589, p-value: 0.047951, balance: -6] instructions [delta: 44061118 ± 13246382, delta(%): 0.0690 ± 0.0207, p-value: 0.000001, balance: 50] icache-misses [delta: -5534468 ± 2779620, delta(%): -0.4331 ± 0.2175, p-value: 0.028014, balance: -28] branch-misses [delta: -1624270 ± 1113244, delta(%): -0.3456 ± 0.2368, p-value: 0.030300, balance: -22] ``` Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D120177
This commit is contained in:
parent
151f809c55
commit
687e4af1c0
|
@ -1292,6 +1292,16 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Convert a move instruction into a conditional move instruction, given a
|
||||
/// condition code.
|
||||
virtual bool
|
||||
convertMoveToConditionalMove(MCInst &Inst, unsigned CC,
|
||||
bool AllowStackMemOp = false,
|
||||
bool AllowBasePtrStackMemOp = false) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Lower a tail call instruction \p Inst if required by target.
|
||||
virtual bool lowerTailCall(MCInst &Inst) {
|
||||
llvm_unreachable("not implemented");
|
||||
|
|
85
bolt/include/bolt/Passes/CMOVConversion.h
Normal file
85
bolt/include/bolt/Passes/CMOVConversion.h
Normal file
|
@ -0,0 +1,85 @@
|
|||
//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass finds the following patterns:
|
||||
// jcc
|
||||
// / \
|
||||
// (empty) mov src, dst
|
||||
// \ /
|
||||
//
|
||||
// and replaces them with:
|
||||
//
|
||||
// cmovcc src, dst
|
||||
//
|
||||
// The advantage of performing this conversion in BOLT (compared to compiler
|
||||
// heuristic driven instruction selection) is that BOLT can use LBR
|
||||
// misprediction information and only convert poorly predictable branches.
|
||||
// Note that branch misprediction rate is different from branch bias.
|
||||
// For well-predictable branches, it might be beneficial to leave jcc+mov as is
|
||||
// from microarchitectural perspective to avoid unneeded dependencies (CMOV
|
||||
// instruction has a dataflow dependence on flags and both operands).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef BOLT_PASSES_CMOVCONVERSION_H
|
||||
#define BOLT_PASSES_CMOVCONVERSION_H
|
||||
|
||||
#include "bolt/Passes/BinaryPasses.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
/// Pass for folding eligible hammocks into CMOV's if profitable.
|
||||
class CMOVConversion : public BinaryFunctionPass {
|
||||
struct Stats {
|
||||
/// Record how many possible cases there are.
|
||||
uint64_t StaticPossible = 0;
|
||||
uint64_t DynamicPossible = 0;
|
||||
|
||||
/// Record how many cases were converted.
|
||||
uint64_t StaticPerformed = 0;
|
||||
uint64_t DynamicPerformed = 0;
|
||||
|
||||
/// Record how many mispredictions were eliminated.
|
||||
uint64_t PossibleMP = 0;
|
||||
uint64_t RemovedMP = 0;
|
||||
|
||||
Stats operator+(const Stats &O) {
|
||||
StaticPossible += O.StaticPossible;
|
||||
DynamicPossible += O.DynamicPossible;
|
||||
StaticPerformed += O.StaticPerformed;
|
||||
DynamicPerformed += O.DynamicPerformed;
|
||||
PossibleMP += O.PossibleMP;
|
||||
RemovedMP += O.RemovedMP;
|
||||
return *this;
|
||||
}
|
||||
double getStaticRatio() { return (double)StaticPerformed / StaticPossible; }
|
||||
double getDynamicRatio() {
|
||||
return (double)DynamicPerformed / DynamicPossible;
|
||||
}
|
||||
double getMPRatio() { return (double)RemovedMP / PossibleMP; }
|
||||
|
||||
void dump();
|
||||
};
|
||||
// BinaryContext-wide stats
|
||||
Stats Global;
|
||||
|
||||
void runOnFunction(BinaryFunction &Function);
|
||||
|
||||
public:
|
||||
explicit CMOVConversion() : BinaryFunctionPass(false) {}
|
||||
|
||||
const char *getName() const override { return "CMOV conversion"; }
|
||||
|
||||
void runOnFunctions(BinaryContext &BC) override;
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
287
bolt/lib/Passes/CMOVConversion.cpp
Normal file
287
bolt/lib/Passes/CMOVConversion.cpp
Normal file
|
@ -0,0 +1,287 @@
|
|||
//===- bolt/Passes/CMOVConversion.cpp ------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the CMOV conversion pass.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "bolt/Passes/CMOVConversion.h"
|
||||
#include "bolt/Core/BinaryBasicBlock.h"
|
||||
#include "bolt/Core/BinaryContext.h"
|
||||
#include "bolt/Utils/CommandLineOpts.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <numeric>
|
||||
|
||||
#define DEBUG_TYPE "cmov"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
static cl::opt<int> BiasThreshold(
|
||||
"cmov-conversion-bias-threshold",
|
||||
cl::desc("minimum condition bias (pct) to perform a CMOV conversion, "
|
||||
"-1 to not account bias"),
|
||||
cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<int> MispredictionThreshold(
|
||||
"cmov-conversion-misprediction-threshold",
|
||||
cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, "
|
||||
"-1 to not account misprediction rate"),
|
||||
cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool> ConvertStackMemOperand(
|
||||
"cmov-conversion-convert-stack-mem-operand",
|
||||
cl::desc("convert moves with stack memory operand (potentially unsafe)"),
|
||||
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool> ConvertBasePtrStackMemOperand(
|
||||
"cmov-conversion-convert-rbp-stack-mem-operand",
|
||||
cl::desc("convert moves with rbp stack memory operand (unsafe, must be off "
|
||||
"for binaries compiled with -fomit-frame-pointer)"),
|
||||
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
// Return true if the CFG conforms to the following subgraph:
|
||||
// Predecessor
|
||||
// / \
|
||||
// | RHS
|
||||
// \ /
|
||||
// LHS
|
||||
// Caller guarantees that LHS and RHS share the same predecessor.
|
||||
bool isIfThenSubgraph(const BinaryBasicBlock &LHS,
|
||||
const BinaryBasicBlock &RHS) {
|
||||
if (LHS.pred_size() != 2 || RHS.pred_size() != 1)
|
||||
return false;
|
||||
|
||||
// Sanity check
|
||||
BinaryBasicBlock *Predecessor = *RHS.pred_begin();
|
||||
assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph");
|
||||
(void)Predecessor;
|
||||
|
||||
if (!LHS.isPredecessor(&RHS))
|
||||
return false;
|
||||
if (RHS.succ_size() != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc,
|
||||
BinaryBasicBlock *&UnconditionalSucc,
|
||||
bool &IsConditionalTaken) {
|
||||
BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true);
|
||||
BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false);
|
||||
bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc);
|
||||
bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc);
|
||||
if (!IsIfThenFallthrough && !IsIfThenTaken)
|
||||
return false;
|
||||
assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph");
|
||||
|
||||
// Output parameters
|
||||
ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc;
|
||||
UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc;
|
||||
IsConditionalTaken = IsIfThenTaken;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return true if basic block instructions can be converted into cmov(s).
|
||||
bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB,
|
||||
unsigned CC) {
|
||||
if (BB.empty())
|
||||
return false;
|
||||
const MCInst *LastInst = BB.getLastNonPseudoInstr();
|
||||
// Only pseudo instructions, can't be converted into CMOV
|
||||
if (LastInst == nullptr)
|
||||
return false;
|
||||
for (const MCInst &Inst : BB) {
|
||||
if (BC.MIB->isPseudo(Inst))
|
||||
continue;
|
||||
// Unconditional branch as a last instruction is OK
|
||||
if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst))
|
||||
continue;
|
||||
MCInst Cmov(Inst);
|
||||
// GPR move is OK
|
||||
if (!BC.MIB->convertMoveToConditionalMove(
|
||||
Cmov, CC, opts::ConvertStackMemOperand,
|
||||
opts::ConvertBasePtrStackMemOperand)) {
|
||||
LLVM_DEBUG({
|
||||
dbgs() << BB.getName() << ": can't convert instruction ";
|
||||
BC.printInstruction(dbgs(), Cmov);
|
||||
});
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) {
|
||||
for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) {
|
||||
if (BC.MIB->isPseudo(*II))
|
||||
continue;
|
||||
if (BC.MIB->isUnconditionalBranch(*II)) {
|
||||
// XXX: this invalidates II but we return immediately
|
||||
BB.eraseInstruction(II);
|
||||
return;
|
||||
}
|
||||
bool Result = BC.MIB->convertMoveToConditionalMove(
|
||||
*II, CC, opts::ConvertStackMemOperand,
|
||||
opts::ConvertBasePtrStackMemOperand);
|
||||
assert(Result && "unexpected instruction");
|
||||
(void)Result;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns misprediction rate if the profile data is available, -1 otherwise.
|
||||
std::pair<int, uint64_t>
|
||||
calculateMispredictionRate(const BinaryBasicBlock &BB) {
|
||||
uint64_t TotalExecCount = 0;
|
||||
uint64_t TotalMispredictionCount = 0;
|
||||
for (auto BI : BB.branch_info()) {
|
||||
TotalExecCount += BI.Count;
|
||||
if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED)
|
||||
TotalMispredictionCount += BI.MispredictedCount;
|
||||
}
|
||||
if (!TotalExecCount)
|
||||
return {-1, TotalMispredictionCount};
|
||||
return {100.0f * TotalMispredictionCount / TotalExecCount,
|
||||
TotalMispredictionCount};
|
||||
}
|
||||
|
||||
// Returns conditional succ bias if the profile is available, -1 otherwise.
|
||||
int calculateConditionBias(const BinaryBasicBlock &BB,
|
||||
const BinaryBasicBlock &ConditionalSucc) {
|
||||
if (auto BranchStats = BB.getBranchStats(&ConditionalSucc))
|
||||
return BranchStats->first;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void CMOVConversion::Stats::dump() {
|
||||
outs() << "converted static " << StaticPerformed << "/" << StaticPossible
|
||||
<< formatv(" ({0:P}) ", getStaticRatio())
|
||||
<< "hammock(s) into CMOV sequences, with dynamic execution count "
|
||||
<< DynamicPerformed << "/" << DynamicPossible
|
||||
<< formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP
|
||||
<< "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio())
|
||||
<< "mispredictions\n";
|
||||
}
|
||||
|
||||
void CMOVConversion::runOnFunction(BinaryFunction &Function) {
|
||||
BinaryContext &BC = Function.getBinaryContext();
|
||||
bool Modified = false;
|
||||
// Function-local stats
|
||||
Stats Local;
|
||||
// Traverse blocks in RPO, merging block with a converted cmov with its
|
||||
// successor.
|
||||
for (BinaryBasicBlock *BB : post_order(&Function)) {
|
||||
uint64_t BBExecCount = BB->getKnownExecutionCount();
|
||||
if (BB->empty() || // The block must have instructions
|
||||
BBExecCount == 0 || // must be hot
|
||||
BB->succ_size() != 2 || // with two successors
|
||||
BB->hasJumpTable()) // no jump table
|
||||
continue;
|
||||
|
||||
assert(BB->isValid() && "traversal internal error");
|
||||
|
||||
// Check branch instruction
|
||||
auto BranchInstrIter = BB->getLastNonPseudo();
|
||||
if (BranchInstrIter == BB->rend() ||
|
||||
!BC.MIB->isConditionalBranch(*BranchInstrIter))
|
||||
continue;
|
||||
|
||||
// Check successors
|
||||
BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc;
|
||||
bool IsConditionalTaken;
|
||||
if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc,
|
||||
IsConditionalTaken)) {
|
||||
LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned CC = BC.MIB->getCondCode(*BranchInstrIter);
|
||||
if (!IsConditionalTaken)
|
||||
CC = BC.MIB->getInvertedCondCode(CC);
|
||||
// Check contents of the conditional block
|
||||
if (!canConvertInstructions(BC, *ConditionalSucc, CC))
|
||||
continue;
|
||||
|
||||
int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc);
|
||||
int MispredictionRate = 0;
|
||||
uint64_t MispredictionCount = 0;
|
||||
std::tie(MispredictionRate, MispredictionCount) =
|
||||
calculateMispredictionRate(*BB);
|
||||
|
||||
Local.StaticPossible++;
|
||||
Local.DynamicPossible += BBExecCount;
|
||||
Local.PossibleMP += MispredictionCount;
|
||||
|
||||
// If the conditional successor is never executed, don't convert it
|
||||
if (ConditionBias < opts::BiasThreshold) {
|
||||
LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName()
|
||||
<< " bias = " << ConditionBias
|
||||
<< ", less than threshold " << opts::BiasThreshold
|
||||
<< '\n');
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check the misprediction rate of a branch
|
||||
if (MispredictionRate < opts::MispredictionThreshold) {
|
||||
LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = "
|
||||
<< MispredictionRate << ", less than threshold "
|
||||
<< opts::MispredictionThreshold << '\n');
|
||||
continue;
|
||||
}
|
||||
|
||||
// remove conditional branch
|
||||
BB->eraseInstruction(std::prev(BranchInstrIter.base()));
|
||||
BB->removeAllSuccessors();
|
||||
// Convert instructions from the conditional successor into cmov's in BB.
|
||||
convertMoves(BC, *ConditionalSucc, CC);
|
||||
BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end());
|
||||
ConditionalSucc->markValid(false);
|
||||
|
||||
// RPO traversal guarantees that the successor is visited and merged if
|
||||
// necessary. Merge the unconditional successor into the current block.
|
||||
BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end());
|
||||
UnconditionalSucc->moveAllSuccessorsTo(BB);
|
||||
UnconditionalSucc->markValid(false);
|
||||
Local.StaticPerformed++;
|
||||
Local.DynamicPerformed += BBExecCount;
|
||||
Local.RemovedMP += MispredictionCount;
|
||||
Modified = true;
|
||||
}
|
||||
if (Modified)
|
||||
Function.eraseInvalidBBs();
|
||||
if (opts::Verbosity > 1) {
|
||||
outs() << "BOLT-INFO: CMOVConversion: " << Function << ", ";
|
||||
Local.dump();
|
||||
}
|
||||
Global = Global + Local;
|
||||
}
|
||||
|
||||
void CMOVConversion::runOnFunctions(BinaryContext &BC) {
|
||||
for (auto &It : BC.getBinaryFunctions()) {
|
||||
BinaryFunction &Function = It.second;
|
||||
if (!shouldOptimize(Function))
|
||||
continue;
|
||||
runOnFunction(Function);
|
||||
}
|
||||
|
||||
outs() << "BOLT-INFO: CMOVConversion total: ";
|
||||
Global.dump();
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
|
@ -5,6 +5,7 @@ add_llvm_library(LLVMBOLTPasses
|
|||
AsmDump.cpp
|
||||
BinaryPasses.cpp
|
||||
BinaryFunctionCallGraph.cpp
|
||||
CMOVConversion.cpp
|
||||
CacheMetrics.cpp
|
||||
CallGraph.cpp
|
||||
CallGraphWalker.cpp
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "bolt/Passes/Aligner.h"
|
||||
#include "bolt/Passes/AllocCombiner.h"
|
||||
#include "bolt/Passes/AsmDump.h"
|
||||
#include "bolt/Passes/CMOVConversion.h"
|
||||
#include "bolt/Passes/FrameOptimizer.h"
|
||||
#include "bolt/Passes/IdenticalCodeFolding.h"
|
||||
#include "bolt/Passes/IndirectCallPromotion.h"
|
||||
|
@ -247,6 +248,11 @@ ThreeWayBranchFlag("three-way-branch",
|
|||
cl::desc("reorder three way branches"),
|
||||
cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
|
||||
cl::desc("fold jcc+mov into cmov"),
|
||||
cl::ZeroOrMore, cl::ReallyHidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
|
@ -393,6 +399,9 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
|
|||
Manager.registerPass(std::make_unique<TailDuplication>(),
|
||||
opts::TailDuplicationFlag);
|
||||
|
||||
Manager.registerPass(std::make_unique<CMOVConversion>(),
|
||||
opts::CMOVConversionFlag);
|
||||
|
||||
// This pass syncs local branches with CFG. If any of the following
|
||||
// passes breaks the sync - they either need to re-run the pass or
|
||||
// fix branches consistency internally.
|
||||
|
|
|
@ -12,12 +12,15 @@
|
|||
|
||||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
#include "bolt/Core/MCPlus.h"
|
||||
#include "bolt/Core/MCPlusBuilder.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCFixupKindInfo.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstBuilder.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegister.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/Support/DataExtractor.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
@ -2134,6 +2137,70 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
convertMoveToConditionalMove(MCInst &Inst, unsigned CC, bool AllowStackMemOp,
|
||||
bool AllowBasePtrStackMemOp) const override {
|
||||
// - Register-register moves are OK
|
||||
// - Stores are filtered out by opcode (no store CMOV)
|
||||
// - Non-stack loads are prohibited (generally unsafe)
|
||||
// - Stack loads are OK if AllowStackMemOp is true
|
||||
// - Stack loads with RBP are OK if AllowBasePtrStackMemOp is true
|
||||
if (isLoad(Inst)) {
|
||||
// If stack memory operands are not allowed, no loads are allowed
|
||||
if (!AllowStackMemOp)
|
||||
return false;
|
||||
|
||||
// If stack memory operands are allowed, check if it's a load from stack
|
||||
bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed;
|
||||
MCPhysReg Reg;
|
||||
int32_t SrcImm;
|
||||
uint16_t StackPtrReg;
|
||||
int64_t StackOffset;
|
||||
uint8_t Size;
|
||||
bool IsStackAccess =
|
||||
isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm,
|
||||
StackPtrReg, StackOffset, Size, IsSimple, IsIndexed);
|
||||
// Prohibit non-stack-based loads
|
||||
if (!IsStackAccess)
|
||||
return false;
|
||||
// If stack memory operands are allowed, check if it's RBP-based
|
||||
if (!AllowBasePtrStackMemOp &&
|
||||
RegInfo->isSubRegisterEq(X86::RBP, StackPtrReg))
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned NewOpcode = 0;
|
||||
switch (Inst.getOpcode()) {
|
||||
case X86::MOV16rr:
|
||||
NewOpcode = X86::CMOV16rr;
|
||||
break;
|
||||
case X86::MOV16rm:
|
||||
NewOpcode = X86::CMOV16rm;
|
||||
break;
|
||||
case X86::MOV32rr:
|
||||
NewOpcode = X86::CMOV32rr;
|
||||
break;
|
||||
case X86::MOV32rm:
|
||||
NewOpcode = X86::CMOV32rm;
|
||||
break;
|
||||
case X86::MOV64rr:
|
||||
NewOpcode = X86::CMOV64rr;
|
||||
break;
|
||||
case X86::MOV64rm:
|
||||
NewOpcode = X86::CMOV64rm;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
Inst.setOpcode(NewOpcode);
|
||||
// Insert CC at the end of prime operands, before annotations
|
||||
Inst.insert(Inst.begin() + MCPlus::getNumPrimeOperands(Inst),
|
||||
MCOperand::createImm(CC));
|
||||
// CMOV is a 3-operand MCInst, so duplicate the destination as src1
|
||||
Inst.insert(Inst.begin(), Inst.getOperand(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool lowerTailCall(MCInst &Inst) override {
|
||||
if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) {
|
||||
Inst.setOpcode(X86::JMP_1);
|
||||
|
|
567
bolt/test/X86/cmov-conversion.s
Normal file
567
bolt/test/X86/cmov-conversion.s
Normal file
|
@ -0,0 +1,567 @@
|
|||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
|
||||
# RUN: link_fdata %s %t.o %t.fdata
|
||||
# RUN: llvm-strip --strip-unneeded %t.o
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
|
||||
# RUN: llvm-bolt %t.exe -data %t.fdata -o %t -lite=0 -v=2 \
|
||||
# RUN: -cmov-conversion -cmov-conversion-misprediction-threshold=-1 \
|
||||
# RUN: -cmov-conversion-bias-threshold=-1 -print-all | FileCheck %s
|
||||
# CHECK: BOLT-INFO: CMOVConversion: CmovInHotPath, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: CmovNotInHotPath, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: MaxIndex, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: MaxIndex_unpredictable, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: MaxValue, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: BinarySearch, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: Transform, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_unpredictable, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group2, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_conflicting_dir, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr2, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr3, converted static 0/0
|
||||
# CHECK: BOLT-INFO: CMOVConversion: test_memoperand_loop, converted static 1/1
|
||||
# CHECK: BOLT-INFO: CMOVConversion: CmovBackToBack, converted static 2/2
|
||||
# CHECK: BOLT-INFO: CMOVConversion total: converted static 8/8
|
||||
|
||||
.globl _start
|
||||
_start:
|
||||
.globl CmovInHotPath # -- Begin function CmovInHotPath
|
||||
.p2align 4, 0x90
|
||||
.type CmovInHotPath,@function
|
||||
CmovInHotPath: # @CmovInHotPath
|
||||
# CHECK-LABEL: Binary Function "CmovInHotPath" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 CmovInHotPath 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
testl %edi, %edi
|
||||
jle LBB0_5
|
||||
# %bb.1: # %for.body.preheader
|
||||
movl %edi, %r8d
|
||||
xorl %edi, %edi
|
||||
# FDATA: 0 [unknown] 0 1 CmovInHotPath #LBB0_2# 1 2
|
||||
LBB0_2: # %for.body
|
||||
movl (%rcx,%rdi,4), %eax
|
||||
leal 1(%rax), %r9d
|
||||
imull %esi, %eax
|
||||
movl $10, %r10d
|
||||
cmpl %edx, %eax
|
||||
# CHECK: cmpl %edx, %eax
|
||||
# CHECK-NEXT: cmovlel %r9d, %r10d
|
||||
LBB0_2_br:
|
||||
jg LBB0_4
|
||||
# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_3# 1 2
|
||||
# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_4# 1 2
|
||||
# %bb.3: # %for.body
|
||||
LBB0_3:
|
||||
movl %r9d, %r10d
|
||||
LBB0_4: # %for.body
|
||||
imull %r9d, %r10d
|
||||
movl %r10d, (%rcx,%rdi,4)
|
||||
addq $1, %rdi
|
||||
cmpq %rdi, %r8
|
||||
jne LBB0_2
|
||||
LBB0_5: # %for.cond.cleanup
|
||||
retq
|
||||
Lfunc_end0:
|
||||
.size CmovInHotPath, Lfunc_end0-CmovInHotPath
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl CmovNotInHotPath # -- Begin function CmovNotInHotPath
|
||||
.p2align 4, 0x90
|
||||
.type CmovNotInHotPath,@function
|
||||
CmovNotInHotPath: # @CmovNotInHotPath
|
||||
# CHECK-LABEL: Binary Function "CmovNotInHotPath" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 CmovNotInHotPath 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
testl %edi, %edi
|
||||
jle LBB1_5
|
||||
# %bb.1: # %for.body.preheader
|
||||
movl %edx, %r9d
|
||||
movl %edi, %r10d
|
||||
xorl %edi, %edi
|
||||
# FDATA: 0 [unknown] 0 1 CmovNotInHotPath #LBB1_2# 1 2
|
||||
LBB1_2: # %for.body
|
||||
movl (%rcx,%rdi,4), %r11d
|
||||
movl %r11d, %eax
|
||||
imull %esi, %eax
|
||||
movl $10, %edx
|
||||
cmpl %r9d, %eax
|
||||
# CHECK: cmpl %r9d, %eax
|
||||
# CHECK-NEXT: cmovlel %r11d, %edx
|
||||
LBB1_4_br:
|
||||
jg LBB1_4
|
||||
# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_3# 1 2
|
||||
# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_4# 1 2
|
||||
# %bb.3: # %for.body
|
||||
LBB1_3:
|
||||
movl %r11d, %edx
|
||||
LBB1_4: # %for.body
|
||||
movl %edx, (%rcx,%rdi,4)
|
||||
movl (%r8,%rdi,4), %eax
|
||||
cltd
|
||||
idivl %r9d
|
||||
movl %eax, (%r8,%rdi,4)
|
||||
addq $1, %rdi
|
||||
cmpq %rdi, %r10
|
||||
jne LBB1_2
|
||||
LBB1_5: # %for.cond.cleanup
|
||||
retq
|
||||
Lfunc_end1:
|
||||
.size CmovNotInHotPath, Lfunc_end1-CmovNotInHotPath
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl MaxIndex # -- Begin function MaxIndex
|
||||
.p2align 4, 0x90
|
||||
.type MaxIndex,@function
|
||||
MaxIndex: # @MaxIndex
|
||||
# CHECK-LABEL: Binary Function "MaxIndex" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 MaxIndex 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
xorl %eax, %eax
|
||||
cmpl $2, %edi
|
||||
jl LBB2_5
|
||||
# %bb.1: # %for.body.preheader
|
||||
movl %edi, %r8d
|
||||
xorl %edi, %edi
|
||||
movl $1, %edx
|
||||
# FDATA: 0 [unknown] 0 1 MaxIndex #LBB2_2# 1 2
|
||||
LBB2_2: # %for.body
|
||||
movl (%rsi,%rdx,4), %r9d
|
||||
movslq %edi, %rcx
|
||||
movl %edx, %eax
|
||||
cmpl (%rsi,%rcx,4), %r9d
|
||||
# CHECK: cmpl (%rsi,%rcx,4), %r9d
|
||||
# CHECK-NEXT: cmovlel %edi, %eax
|
||||
LBB2_2_br:
|
||||
jg LBB2_4
|
||||
# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_3# 1 2
|
||||
# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_4# 1 2
|
||||
# %bb.3: # %for.body
|
||||
LBB2_3:
|
||||
movl %edi, %eax
|
||||
LBB2_4: # %for.body
|
||||
addq $1, %rdx
|
||||
movl %eax, %edi
|
||||
cmpq %rdx, %r8
|
||||
jne LBB2_2
|
||||
LBB2_5: # %for.cond.cleanup
|
||||
retq
|
||||
Lfunc_end2:
|
||||
.size MaxIndex, Lfunc_end2-MaxIndex
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl MaxIndex_unpredictable # -- Begin function MaxIndex_unpredictable
|
||||
.p2align 4, 0x90
|
||||
.type MaxIndex_unpredictable,@function
|
||||
MaxIndex_unpredictable: # @MaxIndex_unpredictable
|
||||
# CHECK-LABEL: Binary Function "MaxIndex_unpredictable" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
xorl %eax, %eax
|
||||
cmpl $2, %edi
|
||||
jl LBB3_5
|
||||
# %bb.1: # %for.body.preheader
|
||||
movl %edi, %r8d
|
||||
xorl %edi, %edi
|
||||
movl $1, %edx
|
||||
# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable #LBB3_2# 1 2
|
||||
LBB3_2: # %for.body
|
||||
movl (%rsi,%rdx,4), %r9d
|
||||
movslq %edi, %rcx
|
||||
movl %edx, %eax
|
||||
cmpl (%rsi,%rcx,4), %r9d
|
||||
# CHECK: cmpl (%rsi,%rcx,4), %r9d
|
||||
# CHECK-NEXT: cmovlel %edi, %eax
|
||||
LBB3_2_br:
|
||||
jg LBB3_4
|
||||
# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_3# 1 2
|
||||
# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_4# 1 2
|
||||
# %bb.3: # %for.body
|
||||
LBB3_3:
|
||||
movl %edi, %eax
|
||||
LBB3_4: # %for.body
|
||||
addq $1, %rdx
|
||||
movl %eax, %edi
|
||||
cmpq %rdx, %r8
|
||||
jne LBB3_2
|
||||
LBB3_5: # %for.cond.cleanup
|
||||
retq
|
||||
Lfunc_end3:
|
||||
.size MaxIndex_unpredictable, Lfunc_end3-MaxIndex_unpredictable
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl MaxValue # -- Begin function MaxValue
|
||||
.p2align 4, 0x90
|
||||
.type MaxValue,@function
|
||||
MaxValue: # @MaxValue
|
||||
# CHECK-LABEL: Binary Function "MaxValue" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 MaxValue 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl (%rsi), %ecx
|
||||
cmpl $2, %edi
|
||||
jge LBB4_3
|
||||
# %bb.1:
|
||||
LBB4_1:
|
||||
movl %ecx, %eax
|
||||
LBB4_2: # %for.cond.cleanup
|
||||
retq
|
||||
LBB4_3: # %for.body.preheader
|
||||
movl %edi, %edi
|
||||
movl $1, %edx
|
||||
LBB4_4: # %for.body
|
||||
movl (%rsi,%rdx,4), %eax
|
||||
cmpl %ecx, %eax
|
||||
# CHECK: cmpl %ecx, %eax
|
||||
# CHECK-NEXT: cmovlel %ecx, %eax
|
||||
LBB4_4_br:
|
||||
jg LBB4_6
|
||||
# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_5# 1 2
|
||||
# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_6# 1 2
|
||||
# %bb.5: # %for.body
|
||||
LBB4_5:
|
||||
movl %ecx, %eax
|
||||
LBB4_6: # %for.body
|
||||
addq $1, %rdx
|
||||
movl %eax, %ecx
|
||||
cmpq %rdx, %rdi
|
||||
je LBB4_2
|
||||
jmp LBB4_4
|
||||
Lfunc_end4:
|
||||
.size MaxValue, Lfunc_end4-MaxValue
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl BinarySearch # -- Begin function BinarySearch
|
||||
.p2align 4, 0x90
|
||||
.type BinarySearch,@function
|
||||
BinarySearch: # @BinarySearch
|
||||
# CHECK-LABEL: Binary Function "BinarySearch" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 BinarySearch 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl (%rsi), %eax
|
||||
jmp LBB5_2
|
||||
LBB5_1: # %while.body
|
||||
movl %ecx, %eax
|
||||
xorl %ecx, %ecx
|
||||
btl %eax, %edi
|
||||
setae %cl
|
||||
movq 8(%rdx,%rcx,8), %rdx
|
||||
LBB5_2: # %while.body
|
||||
movl (%rdx), %ecx
|
||||
cmpl %ecx, %eax
|
||||
ja LBB5_1
|
||||
# %bb.3: # %while.end
|
||||
retq
|
||||
Lfunc_end5:
|
||||
.size BinarySearch, Lfunc_end5-BinarySearch
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl Transform # -- Begin function Transform
|
||||
.p2align 4, 0x90
|
||||
.type Transform,@function
|
||||
Transform: # @Transform
|
||||
# CHECK-LABEL: Binary Function "Transform" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 Transform 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movb $1, %al
|
||||
testb %al, %al
|
||||
jne LBB6_5
|
||||
# %bb.1: # %while.body.preheader
|
||||
movl %edx, %r8d
|
||||
xorl %esi, %esi
|
||||
LBB6_2: # %while.body
|
||||
movslq %esi, %rsi
|
||||
movl (%rdi,%rsi,4), %eax
|
||||
xorl %edx, %edx
|
||||
divl %r8d
|
||||
movl %eax, %edx
|
||||
movl $11, %eax
|
||||
movl %r8d, %ecx
|
||||
cmpl %r8d, %edx
|
||||
ja LBB6_4
|
||||
# %bb.3: # %while.body
|
||||
movl $22, %eax
|
||||
movl $22, %ecx
|
||||
LBB6_4: # %while.body
|
||||
xorl %edx, %edx
|
||||
divl %ecx
|
||||
movl %edx, (%rdi,%rsi,4)
|
||||
addl $1, %esi
|
||||
cmpl %r9d, %esi
|
||||
ja LBB6_2
|
||||
LBB6_5: # %while.end
|
||||
retq
|
||||
Lfunc_end6:
|
||||
.size Transform, Lfunc_end6-Transform
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand # -- Begin function test_cmov_memoperand
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand,@function
|
||||
test_cmov_memoperand: # @test_cmov_memoperand
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edx, %eax
|
||||
cmpl %esi, %edi
|
||||
ja LBB7_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %eax
|
||||
LBB7_2: # %entry
|
||||
retq
|
||||
Lfunc_end7:
|
||||
.size test_cmov_memoperand, Lfunc_end7-test_cmov_memoperand
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_unpredictable # -- Begin function test_cmov_memoperand_unpredictable
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_unpredictable,@function
|
||||
test_cmov_memoperand_unpredictable: # @test_cmov_memoperand_unpredictable
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_unpredictable" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_unpredictable 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edx, %eax
|
||||
cmpl %esi, %edi
|
||||
ja LBB8_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %eax
|
||||
LBB8_2: # %entry
|
||||
retq
|
||||
Lfunc_end8:
|
||||
.size test_cmov_memoperand_unpredictable, Lfunc_end8-test_cmov_memoperand_unpredictable
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_in_group # -- Begin function test_cmov_memoperand_in_group
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_in_group,@function
|
||||
test_cmov_memoperand_in_group: # @test_cmov_memoperand_in_group
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edx, %eax
|
||||
movl %edx, %r8d
|
||||
cmpl %esi, %edi
|
||||
ja LBB9_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %r8d
|
||||
movl %edi, %eax
|
||||
movl %esi, %edx
|
||||
LBB9_2: # %entry
|
||||
addl %r8d, %eax
|
||||
addl %edx, %eax
|
||||
retq
|
||||
Lfunc_end9:
|
||||
.size test_cmov_memoperand_in_group, Lfunc_end9-test_cmov_memoperand_in_group
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_in_group2 # -- Begin function test_cmov_memoperand_in_group2
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_in_group2,@function
|
||||
test_cmov_memoperand_in_group2: # @test_cmov_memoperand_in_group2
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group2" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group2 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edx, %eax
|
||||
movl %edx, %r8d
|
||||
cmpl %esi, %edi
|
||||
jbe LBB10_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %r8d
|
||||
movl %edi, %eax
|
||||
movl %esi, %edx
|
||||
LBB10_2: # %entry
|
||||
addl %r8d, %eax
|
||||
addl %edx, %eax
|
||||
retq
|
||||
Lfunc_end10:
|
||||
.size test_cmov_memoperand_in_group2, Lfunc_end10-test_cmov_memoperand_in_group2
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_conflicting_dir # -- Begin function test_cmov_memoperand_conflicting_dir
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_conflicting_dir,@function
|
||||
test_cmov_memoperand_conflicting_dir: # @test_cmov_memoperand_conflicting_dir
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_conflicting_dir" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_conflicting_dir 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
cmpl %esi, %edi
|
||||
movl (%rcx), %eax
|
||||
cmoval %edx, %eax
|
||||
cmoval (%r8), %edx
|
||||
addl %edx, %eax
|
||||
retq
|
||||
Lfunc_end11:
|
||||
.size test_cmov_memoperand_conflicting_dir, Lfunc_end11-test_cmov_memoperand_conflicting_dir
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_in_group_reuse_for_addr # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_in_group_reuse_for_addr,@function
|
||||
test_cmov_memoperand_in_group_reuse_for_addr: # @test_cmov_memoperand_in_group_reuse_for_addr
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edi, %eax
|
||||
cmpl %esi, %edi
|
||||
ja LBB12_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %eax
|
||||
LBB12_2: # %entry
|
||||
retq
|
||||
Lfunc_end12:
|
||||
.size test_cmov_memoperand_in_group_reuse_for_addr, Lfunc_end12-test_cmov_memoperand_in_group_reuse_for_addr
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_in_group_reuse_for_addr2 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr2
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_in_group_reuse_for_addr2,@function
|
||||
test_cmov_memoperand_in_group_reuse_for_addr2: # @test_cmov_memoperand_in_group_reuse_for_addr2
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr2" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr2 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edi, %eax
|
||||
cmpl %esi, %edi
|
||||
ja LBB13_2
|
||||
# %bb.1: # %entry
|
||||
movq (%rcx), %rax
|
||||
movl (%rax), %eax
|
||||
LBB13_2: # %entry
|
||||
retq
|
||||
Lfunc_end13:
|
||||
.size test_cmov_memoperand_in_group_reuse_for_addr2, Lfunc_end13-test_cmov_memoperand_in_group_reuse_for_addr2
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_cmov_memoperand_in_group_reuse_for_addr3 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr3
|
||||
.p2align 4, 0x90
|
||||
.type test_cmov_memoperand_in_group_reuse_for_addr3,@function
|
||||
test_cmov_memoperand_in_group_reuse_for_addr3: # @test_cmov_memoperand_in_group_reuse_for_addr3
|
||||
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr3" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr3 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movl %edi, %eax
|
||||
cmpl %esi, %edi
|
||||
ja LBB14_2
|
||||
# %bb.1: # %entry
|
||||
movl (%rcx), %eax
|
||||
LBB14_2: # %entry
|
||||
retq
|
||||
Lfunc_end14:
|
||||
.size test_cmov_memoperand_in_group_reuse_for_addr3, Lfunc_end14-test_cmov_memoperand_in_group_reuse_for_addr3
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl test_memoperand_loop # -- Begin function test_memoperand_loop
|
||||
.p2align 4, 0x90
|
||||
.type test_memoperand_loop,@function
|
||||
test_memoperand_loop: # @test_memoperand_loop
|
||||
# CHECK-LABEL: Binary Function "test_memoperand_loop" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 test_memoperand_loop 0 1 2
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
movq begin@GOTPCREL(%rip), %r8
|
||||
movq (%r8), %rax
|
||||
movq end@GOTPCREL(%rip), %rcx
|
||||
movq (%rcx), %rdx
|
||||
xorl %esi, %esi
|
||||
movq %rax, %rcx
|
||||
LBB15_1: # %loop.body
|
||||
addq $8, %rcx
|
||||
cmpq %rdx, %rcx
|
||||
ja LBB15_3
|
||||
# %bb.2: # %loop.body
|
||||
movq (%r8), %rcx
|
||||
LBB15_3: # %loop.body
|
||||
movl %edi, (%rcx)
|
||||
addq $8, %rcx
|
||||
cmpq %rdx, %rcx
|
||||
# CHECK: movl %edi, (%rcx)
|
||||
# CHECK-NEXT: addq $0x8, %rcx
|
||||
# CHECK-NEXT: cmpq %rdx, %rcx
|
||||
# CHECK-NEXT: cmovbeq %rax, %rcx
|
||||
LBB15_3_br:
|
||||
ja LBB15_5
|
||||
# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_4# 1 2
|
||||
# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_5# 1 2
|
||||
# %bb.4: # %loop.body
|
||||
LBB15_4:
|
||||
movq %rax, %rcx
|
||||
LBB15_5: # %loop.body
|
||||
movl %edi, (%rcx)
|
||||
addl $1, %esi
|
||||
cmpl $1024, %esi # imm = 0x400
|
||||
jl LBB15_1
|
||||
# %bb.6: # %exit
|
||||
retq
|
||||
Lfunc_end15:
|
||||
.size test_memoperand_loop, Lfunc_end15-test_memoperand_loop
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.globl CmovBackToBack # -- Begin function CmovBackToBack
|
||||
.p2align 4, 0x90
|
||||
.type CmovBackToBack,@function
|
||||
CmovBackToBack: # @CmovBackToBack
|
||||
# CHECK-LABEL: Binary Function "CmovBackToBack" after CMOV conversion
|
||||
# FDATA: 0 [unknown] 0 1 CmovBackToBack 0 1 2
|
||||
.cfi_startproc
|
||||
testl %edi, %edi
|
||||
jle LBB16_5
|
||||
movl %edi, %r8d
|
||||
xorl %edi, %edi
|
||||
# FDATA: 0 [unknown] 0 1 CmovBackToBack #LBB16_2# 1 2
|
||||
LBB16_2: # %for.body
|
||||
movl (%rcx,%rdi,4), %eax
|
||||
leal 1(%rax), %r9d
|
||||
imull %esi, %eax
|
||||
movl $10, %r10d
|
||||
cmpl %edx, %eax
|
||||
# CHECK: cmpl %edx, %eax
|
||||
# CHECK-NEXT: cmovlel %r9d, %r10d
|
||||
LBB16_2_br:
|
||||
jg LBB16_4
|
||||
# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_3# 1 2
|
||||
# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_4# 1 2
|
||||
LBB16_3:
|
||||
movl %r9d, %r10d
|
||||
LBB16_4: # %for.body
|
||||
# CHECK-NEXT: cmovlel %r9d, %r10d
|
||||
LBB16_6_br:
|
||||
jg LBB16_8
|
||||
# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_7# 1 2
|
||||
# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_8# 1 2
|
||||
LBB16_7:
|
||||
movl %r9d, %r10d
|
||||
LBB16_8: # %for.body
|
||||
imull %r9d, %r10d
|
||||
movl %r10d, (%rcx,%rdi,4)
|
||||
addq $1, %rdi
|
||||
cmpq %rdi, %r8
|
||||
jne LBB16_2
|
||||
LBB16_5: # %for.cond.cleanup
|
||||
retq
|
||||
Lfunc_end16:
|
||||
.size CmovBackToBack, Lfunc_end16-CmovBackToBack
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.data
|
||||
.globl begin
|
||||
begin:
|
||||
.quad 0xdeadbeef
|
||||
.globl end
|
||||
end:
|
||||
.quad 0xfaceb00c
|
||||
|
Loading…
Reference in a new issue