[BOLT] CMOVConversion pass

Convert simple hammocks into cmov based on misprediction rate.

Test Plan:
- Assembly test: `cmov-conversion.s`
- Testing on a binary:
  # Bootstrap clang with `-x86-cmov-converter-force-all` and `-Wl,--emit-relocs`
  (Release build)
  # Collect perf.data:

    - `clang++ <opts> bolt/lib/Core/BinaryFunction.cpp -E > bf.cpp`
    - `perf record -e cycles:u -j any,u -- clang-15 bf.cpp -O2 -std=c++14 -c -o bf.o`
  # Optimize clang-15 with and w/o -cmov-conversion:
    - `llvm-bolt clang-15 -p perf.data -o clang-15.bolt`
    - `llvm-bolt clang-15 -p perf.data -cmov-conversion -o clang-15.bolt.cmovconv`
  # Run perf experiment:
    - test: `clang-15.bolt.cmovconv`,
    - control: `clang-15.bolt`,
    - workload (clang options): `bf.cpp -O2 -std=c++14 -c -o bf.o`
Results:
```
  task-clock [delta: -360.21 ± 356.75, delta(%): -1.7760 ± 1.7589, p-value: 0.047951, balance: -6]
  instructions  [delta: 44061118 ± 13246382, delta(%): 0.0690 ± 0.0207, p-value: 0.000001, balance: 50]
  icache-misses [delta: -5534468 ± 2779620, delta(%): -0.4331 ± 0.2175, p-value: 0.028014, balance: -28]
  branch-misses [delta: -1624270 ± 1113244, delta(%): -0.3456 ± 0.2368, p-value: 0.030300, balance: -22]
```

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D120177
This commit is contained in:
Amir Ayupov 2022-02-07 20:16:13 -08:00
parent 151f809c55
commit 687e4af1c0
7 changed files with 1026 additions and 0 deletions

View file

@ -1292,6 +1292,16 @@ public:
return false;
}
/// Convert a move instruction into a conditional move instruction, given a
/// condition code.
virtual bool
convertMoveToConditionalMove(MCInst &Inst, unsigned CC,
bool AllowStackMemOp = false,
bool AllowBasePtrStackMemOp = false) const {
llvm_unreachable("not implemented");
return false;
}
/// Lower a tail call instruction \p Inst if required by target.
virtual bool lowerTailCall(MCInst &Inst) {
llvm_unreachable("not implemented");

View file

@ -0,0 +1,85 @@
//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass finds the following patterns:
// jcc
// / \
// (empty) mov src, dst
// \ /
//
// and replaces them with:
//
// cmovcc src, dst
//
// The advantage of performing this conversion in BOLT (compared to compiler
// heuristic driven instruction selection) is that BOLT can use LBR
// misprediction information and only convert poorly predictable branches.
// Note that branch misprediction rate is different from branch bias.
// For well-predictable branches, it might be beneficial to leave jcc+mov as is
// from microarchitectural perspective to avoid unneeded dependencies (CMOV
// instruction has a dataflow dependence on flags and both operands).
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_PASSES_CMOVCONVERSION_H
#define BOLT_PASSES_CMOVCONVERSION_H
#include "bolt/Passes/BinaryPasses.h"
namespace llvm {
namespace bolt {
/// Pass for folding eligible hammocks into CMOV's if profitable.
class CMOVConversion : public BinaryFunctionPass {
struct Stats {
/// Record how many possible cases there are.
uint64_t StaticPossible = 0;
uint64_t DynamicPossible = 0;
/// Record how many cases were converted.
uint64_t StaticPerformed = 0;
uint64_t DynamicPerformed = 0;
/// Record how many mispredictions were eliminated.
uint64_t PossibleMP = 0;
uint64_t RemovedMP = 0;
Stats operator+(const Stats &O) {
StaticPossible += O.StaticPossible;
DynamicPossible += O.DynamicPossible;
StaticPerformed += O.StaticPerformed;
DynamicPerformed += O.DynamicPerformed;
PossibleMP += O.PossibleMP;
RemovedMP += O.RemovedMP;
return *this;
}
double getStaticRatio() { return (double)StaticPerformed / StaticPossible; }
double getDynamicRatio() {
return (double)DynamicPerformed / DynamicPossible;
}
double getMPRatio() { return (double)RemovedMP / PossibleMP; }
void dump();
};
// BinaryContext-wide stats
Stats Global;
void runOnFunction(BinaryFunction &Function);
public:
explicit CMOVConversion() : BinaryFunctionPass(false) {}
const char *getName() const override { return "CMOV conversion"; }
void runOnFunctions(BinaryContext &BC) override;
};
} // namespace bolt
} // namespace llvm
#endif

View file

@ -0,0 +1,287 @@
//===- bolt/Passes/CMOVConversion.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the CMOV conversion pass.
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include <numeric>
#define DEBUG_TYPE "cmov"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
static cl::opt<int> BiasThreshold(
"cmov-conversion-bias-threshold",
cl::desc("minimum condition bias (pct) to perform a CMOV conversion, "
"-1 to not account bias"),
cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory));
static cl::opt<int> MispredictionThreshold(
"cmov-conversion-misprediction-threshold",
cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, "
"-1 to not account misprediction rate"),
cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory));
static cl::opt<bool> ConvertStackMemOperand(
"cmov-conversion-convert-stack-mem-operand",
cl::desc("convert moves with stack memory operand (potentially unsafe)"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
static cl::opt<bool> ConvertBasePtrStackMemOperand(
"cmov-conversion-convert-rbp-stack-mem-operand",
cl::desc("convert moves with rbp stack memory operand (unsafe, must be off "
"for binaries compiled with -fomit-frame-pointer)"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
// Return true if the CFG conforms to the following subgraph:
// Predecessor
// / \
// | RHS
// \ /
// LHS
// Caller guarantees that LHS and RHS share the same predecessor.
bool isIfThenSubgraph(const BinaryBasicBlock &LHS,
const BinaryBasicBlock &RHS) {
if (LHS.pred_size() != 2 || RHS.pred_size() != 1)
return false;
// Sanity check
BinaryBasicBlock *Predecessor = *RHS.pred_begin();
assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph");
(void)Predecessor;
if (!LHS.isPredecessor(&RHS))
return false;
if (RHS.succ_size() != 1)
return false;
return true;
}
bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc,
BinaryBasicBlock *&UnconditionalSucc,
bool &IsConditionalTaken) {
BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true);
BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false);
bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc);
bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc);
if (!IsIfThenFallthrough && !IsIfThenTaken)
return false;
assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph");
// Output parameters
ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc;
UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc;
IsConditionalTaken = IsIfThenTaken;
return true;
}
// Return true if basic block instructions can be converted into cmov(s).
bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB,
unsigned CC) {
if (BB.empty())
return false;
const MCInst *LastInst = BB.getLastNonPseudoInstr();
// Only pseudo instructions, can't be converted into CMOV
if (LastInst == nullptr)
return false;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
// Unconditional branch as a last instruction is OK
if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst))
continue;
MCInst Cmov(Inst);
// GPR move is OK
if (!BC.MIB->convertMoveToConditionalMove(
Cmov, CC, opts::ConvertStackMemOperand,
opts::ConvertBasePtrStackMemOperand)) {
LLVM_DEBUG({
dbgs() << BB.getName() << ": can't convert instruction ";
BC.printInstruction(dbgs(), Cmov);
});
return false;
}
}
return true;
}
void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) {
for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) {
if (BC.MIB->isPseudo(*II))
continue;
if (BC.MIB->isUnconditionalBranch(*II)) {
// XXX: this invalidates II but we return immediately
BB.eraseInstruction(II);
return;
}
bool Result = BC.MIB->convertMoveToConditionalMove(
*II, CC, opts::ConvertStackMemOperand,
opts::ConvertBasePtrStackMemOperand);
assert(Result && "unexpected instruction");
(void)Result;
}
}
// Returns misprediction rate if the profile data is available, -1 otherwise.
std::pair<int, uint64_t>
calculateMispredictionRate(const BinaryBasicBlock &BB) {
uint64_t TotalExecCount = 0;
uint64_t TotalMispredictionCount = 0;
for (auto BI : BB.branch_info()) {
TotalExecCount += BI.Count;
if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED)
TotalMispredictionCount += BI.MispredictedCount;
}
if (!TotalExecCount)
return {-1, TotalMispredictionCount};
return {100.0f * TotalMispredictionCount / TotalExecCount,
TotalMispredictionCount};
}
// Returns conditional succ bias if the profile is available, -1 otherwise.
int calculateConditionBias(const BinaryBasicBlock &BB,
const BinaryBasicBlock &ConditionalSucc) {
if (auto BranchStats = BB.getBranchStats(&ConditionalSucc))
return BranchStats->first;
return -1;
}
void CMOVConversion::Stats::dump() {
outs() << "converted static " << StaticPerformed << "/" << StaticPossible
<< formatv(" ({0:P}) ", getStaticRatio())
<< "hammock(s) into CMOV sequences, with dynamic execution count "
<< DynamicPerformed << "/" << DynamicPossible
<< formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP
<< "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio())
<< "mispredictions\n";
}
void CMOVConversion::runOnFunction(BinaryFunction &Function) {
BinaryContext &BC = Function.getBinaryContext();
bool Modified = false;
// Function-local stats
Stats Local;
// Traverse blocks in RPO, merging block with a converted cmov with its
// successor.
for (BinaryBasicBlock *BB : post_order(&Function)) {
uint64_t BBExecCount = BB->getKnownExecutionCount();
if (BB->empty() || // The block must have instructions
BBExecCount == 0 || // must be hot
BB->succ_size() != 2 || // with two successors
BB->hasJumpTable()) // no jump table
continue;
assert(BB->isValid() && "traversal internal error");
// Check branch instruction
auto BranchInstrIter = BB->getLastNonPseudo();
if (BranchInstrIter == BB->rend() ||
!BC.MIB->isConditionalBranch(*BranchInstrIter))
continue;
// Check successors
BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc;
bool IsConditionalTaken;
if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc,
IsConditionalTaken)) {
LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n");
continue;
}
unsigned CC = BC.MIB->getCondCode(*BranchInstrIter);
if (!IsConditionalTaken)
CC = BC.MIB->getInvertedCondCode(CC);
// Check contents of the conditional block
if (!canConvertInstructions(BC, *ConditionalSucc, CC))
continue;
int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc);
int MispredictionRate = 0;
uint64_t MispredictionCount = 0;
std::tie(MispredictionRate, MispredictionCount) =
calculateMispredictionRate(*BB);
Local.StaticPossible++;
Local.DynamicPossible += BBExecCount;
Local.PossibleMP += MispredictionCount;
// If the conditional successor is never executed, don't convert it
if (ConditionBias < opts::BiasThreshold) {
LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName()
<< " bias = " << ConditionBias
<< ", less than threshold " << opts::BiasThreshold
<< '\n');
continue;
}
// Check the misprediction rate of a branch
if (MispredictionRate < opts::MispredictionThreshold) {
LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = "
<< MispredictionRate << ", less than threshold "
<< opts::MispredictionThreshold << '\n');
continue;
}
// remove conditional branch
BB->eraseInstruction(std::prev(BranchInstrIter.base()));
BB->removeAllSuccessors();
// Convert instructions from the conditional successor into cmov's in BB.
convertMoves(BC, *ConditionalSucc, CC);
BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end());
ConditionalSucc->markValid(false);
// RPO traversal guarantees that the successor is visited and merged if
// necessary. Merge the unconditional successor into the current block.
BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end());
UnconditionalSucc->moveAllSuccessorsTo(BB);
UnconditionalSucc->markValid(false);
Local.StaticPerformed++;
Local.DynamicPerformed += BBExecCount;
Local.RemovedMP += MispredictionCount;
Modified = true;
}
if (Modified)
Function.eraseInvalidBBs();
if (opts::Verbosity > 1) {
outs() << "BOLT-INFO: CMOVConversion: " << Function << ", ";
Local.dump();
}
Global = Global + Local;
}
void CMOVConversion::runOnFunctions(BinaryContext &BC) {
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (!shouldOptimize(Function))
continue;
runOnFunction(Function);
}
outs() << "BOLT-INFO: CMOVConversion total: ";
Global.dump();
}
} // end namespace bolt
} // end namespace llvm

View file

@ -5,6 +5,7 @@ add_llvm_library(LLVMBOLTPasses
AsmDump.cpp
BinaryPasses.cpp
BinaryFunctionCallGraph.cpp
CMOVConversion.cpp
CacheMetrics.cpp
CallGraph.cpp
CallGraphWalker.cpp

View file

@ -11,6 +11,7 @@
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/FrameOptimizer.h"
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
@ -247,6 +248,11 @@ ThreeWayBranchFlag("three-way-branch",
cl::desc("reorder three way branches"),
cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
cl::desc("fold jcc+mov into cmov"),
cl::ZeroOrMore, cl::ReallyHidden,
cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@ -393,6 +399,9 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(std::make_unique<TailDuplication>(),
opts::TailDuplicationFlag);
Manager.registerPass(std::make_unique<CMOVConversion>(),
opts::CMOVConversionFlag);
// This pass syncs local branches with CFG. If any of the following
// passes breaks the sync - they either need to re-run the pass or
// fix branches consistency internally.

View file

@ -12,12 +12,15 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "bolt/Core/MCPlus.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
@ -2134,6 +2137,70 @@ public:
return true;
}
bool
convertMoveToConditionalMove(MCInst &Inst, unsigned CC, bool AllowStackMemOp,
bool AllowBasePtrStackMemOp) const override {
// - Register-register moves are OK
// - Stores are filtered out by opcode (no store CMOV)
// - Non-stack loads are prohibited (generally unsafe)
// - Stack loads are OK if AllowStackMemOp is true
// - Stack loads with RBP are OK if AllowBasePtrStackMemOp is true
if (isLoad(Inst)) {
// If stack memory operands are not allowed, no loads are allowed
if (!AllowStackMemOp)
return false;
// If stack memory operands are allowed, check if it's a load from stack
bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed;
MCPhysReg Reg;
int32_t SrcImm;
uint16_t StackPtrReg;
int64_t StackOffset;
uint8_t Size;
bool IsStackAccess =
isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm,
StackPtrReg, StackOffset, Size, IsSimple, IsIndexed);
// Prohibit non-stack-based loads
if (!IsStackAccess)
return false;
// If stack memory operands are allowed, check if it's RBP-based
if (!AllowBasePtrStackMemOp &&
RegInfo->isSubRegisterEq(X86::RBP, StackPtrReg))
return false;
}
unsigned NewOpcode = 0;
switch (Inst.getOpcode()) {
case X86::MOV16rr:
NewOpcode = X86::CMOV16rr;
break;
case X86::MOV16rm:
NewOpcode = X86::CMOV16rm;
break;
case X86::MOV32rr:
NewOpcode = X86::CMOV32rr;
break;
case X86::MOV32rm:
NewOpcode = X86::CMOV32rm;
break;
case X86::MOV64rr:
NewOpcode = X86::CMOV64rr;
break;
case X86::MOV64rm:
NewOpcode = X86::CMOV64rm;
break;
default:
return false;
}
Inst.setOpcode(NewOpcode);
// Insert CC at the end of prime operands, before annotations
Inst.insert(Inst.begin() + MCPlus::getNumPrimeOperands(Inst),
MCOperand::createImm(CC));
// CMOV is a 3-operand MCInst, so duplicate the destination as src1
Inst.insert(Inst.begin(), Inst.getOperand(0));
return true;
}
bool lowerTailCall(MCInst &Inst) override {
if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) {
Inst.setOpcode(X86::JMP_1);

View file

@ -0,0 +1,567 @@
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -data %t.fdata -o %t -lite=0 -v=2 \
# RUN: -cmov-conversion -cmov-conversion-misprediction-threshold=-1 \
# RUN: -cmov-conversion-bias-threshold=-1 -print-all | FileCheck %s
# CHECK: BOLT-INFO: CMOVConversion: CmovInHotPath, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: CmovNotInHotPath, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: MaxIndex, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: MaxIndex_unpredictable, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: MaxValue, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: BinarySearch, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: Transform, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_unpredictable, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group2, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_conflicting_dir, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr2, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr3, converted static 0/0
# CHECK: BOLT-INFO: CMOVConversion: test_memoperand_loop, converted static 1/1
# CHECK: BOLT-INFO: CMOVConversion: CmovBackToBack, converted static 2/2
# CHECK: BOLT-INFO: CMOVConversion total: converted static 8/8
.globl _start
_start:
.globl CmovInHotPath # -- Begin function CmovInHotPath
.p2align 4, 0x90
.type CmovInHotPath,@function
CmovInHotPath: # @CmovInHotPath
# CHECK-LABEL: Binary Function "CmovInHotPath" after CMOV conversion
# FDATA: 0 [unknown] 0 1 CmovInHotPath 0 1 2
.cfi_startproc
# %bb.0: # %entry
testl %edi, %edi
jle LBB0_5
# %bb.1: # %for.body.preheader
movl %edi, %r8d
xorl %edi, %edi
# FDATA: 0 [unknown] 0 1 CmovInHotPath #LBB0_2# 1 2
LBB0_2: # %for.body
movl (%rcx,%rdi,4), %eax
leal 1(%rax), %r9d
imull %esi, %eax
movl $10, %r10d
cmpl %edx, %eax
# CHECK: cmpl %edx, %eax
# CHECK-NEXT: cmovlel %r9d, %r10d
LBB0_2_br:
jg LBB0_4
# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_3# 1 2
# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_4# 1 2
# %bb.3: # %for.body
LBB0_3:
movl %r9d, %r10d
LBB0_4: # %for.body
imull %r9d, %r10d
movl %r10d, (%rcx,%rdi,4)
addq $1, %rdi
cmpq %rdi, %r8
jne LBB0_2
LBB0_5: # %for.cond.cleanup
retq
Lfunc_end0:
.size CmovInHotPath, Lfunc_end0-CmovInHotPath
.cfi_endproc
# -- End function
.globl CmovNotInHotPath # -- Begin function CmovNotInHotPath
.p2align 4, 0x90
.type CmovNotInHotPath,@function
CmovNotInHotPath: # @CmovNotInHotPath
# CHECK-LABEL: Binary Function "CmovNotInHotPath" after CMOV conversion
# FDATA: 0 [unknown] 0 1 CmovNotInHotPath 0 1 2
.cfi_startproc
# %bb.0: # %entry
testl %edi, %edi
jle LBB1_5
# %bb.1: # %for.body.preheader
movl %edx, %r9d
movl %edi, %r10d
xorl %edi, %edi
# FDATA: 0 [unknown] 0 1 CmovNotInHotPath #LBB1_2# 1 2
LBB1_2: # %for.body
movl (%rcx,%rdi,4), %r11d
movl %r11d, %eax
imull %esi, %eax
movl $10, %edx
cmpl %r9d, %eax
# CHECK: cmpl %r9d, %eax
# CHECK-NEXT: cmovlel %r11d, %edx
LBB1_4_br:
jg LBB1_4
# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_3# 1 2
# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_4# 1 2
# %bb.3: # %for.body
LBB1_3:
movl %r11d, %edx
LBB1_4: # %for.body
movl %edx, (%rcx,%rdi,4)
movl (%r8,%rdi,4), %eax
cltd
idivl %r9d
movl %eax, (%r8,%rdi,4)
addq $1, %rdi
cmpq %rdi, %r10
jne LBB1_2
LBB1_5: # %for.cond.cleanup
retq
Lfunc_end1:
.size CmovNotInHotPath, Lfunc_end1-CmovNotInHotPath
.cfi_endproc
# -- End function
.globl MaxIndex # -- Begin function MaxIndex
.p2align 4, 0x90
.type MaxIndex,@function
MaxIndex: # @MaxIndex
# CHECK-LABEL: Binary Function "MaxIndex" after CMOV conversion
# FDATA: 0 [unknown] 0 1 MaxIndex 0 1 2
.cfi_startproc
# %bb.0: # %entry
xorl %eax, %eax
cmpl $2, %edi
jl LBB2_5
# %bb.1: # %for.body.preheader
movl %edi, %r8d
xorl %edi, %edi
movl $1, %edx
# FDATA: 0 [unknown] 0 1 MaxIndex #LBB2_2# 1 2
LBB2_2: # %for.body
movl (%rsi,%rdx,4), %r9d
movslq %edi, %rcx
movl %edx, %eax
cmpl (%rsi,%rcx,4), %r9d
# CHECK: cmpl (%rsi,%rcx,4), %r9d
# CHECK-NEXT: cmovlel %edi, %eax
LBB2_2_br:
jg LBB2_4
# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_3# 1 2
# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_4# 1 2
# %bb.3: # %for.body
LBB2_3:
movl %edi, %eax
LBB2_4: # %for.body
addq $1, %rdx
movl %eax, %edi
cmpq %rdx, %r8
jne LBB2_2
LBB2_5: # %for.cond.cleanup
retq
Lfunc_end2:
.size MaxIndex, Lfunc_end2-MaxIndex
.cfi_endproc
# -- End function
.globl MaxIndex_unpredictable # -- Begin function MaxIndex_unpredictable
.p2align 4, 0x90
.type MaxIndex_unpredictable,@function
MaxIndex_unpredictable: # @MaxIndex_unpredictable
# CHECK-LABEL: Binary Function "MaxIndex_unpredictable" after CMOV conversion
# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable 0 1 2
.cfi_startproc
# %bb.0: # %entry
xorl %eax, %eax
cmpl $2, %edi
jl LBB3_5
# %bb.1: # %for.body.preheader
movl %edi, %r8d
xorl %edi, %edi
movl $1, %edx
# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable #LBB3_2# 1 2
LBB3_2: # %for.body
movl (%rsi,%rdx,4), %r9d
movslq %edi, %rcx
movl %edx, %eax
cmpl (%rsi,%rcx,4), %r9d
# CHECK: cmpl (%rsi,%rcx,4), %r9d
# CHECK-NEXT: cmovlel %edi, %eax
LBB3_2_br:
jg LBB3_4
# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_3# 1 2
# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_4# 1 2
# %bb.3: # %for.body
LBB3_3:
movl %edi, %eax
LBB3_4: # %for.body
addq $1, %rdx
movl %eax, %edi
cmpq %rdx, %r8
jne LBB3_2
LBB3_5: # %for.cond.cleanup
retq
Lfunc_end3:
.size MaxIndex_unpredictable, Lfunc_end3-MaxIndex_unpredictable
.cfi_endproc
# -- End function
.globl MaxValue # -- Begin function MaxValue
.p2align 4, 0x90
.type MaxValue,@function
MaxValue: # @MaxValue
# CHECK-LABEL: Binary Function "MaxValue" after CMOV conversion
# FDATA: 0 [unknown] 0 1 MaxValue 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl (%rsi), %ecx
cmpl $2, %edi
jge LBB4_3
# %bb.1:
LBB4_1:
movl %ecx, %eax
LBB4_2: # %for.cond.cleanup
retq
LBB4_3: # %for.body.preheader
movl %edi, %edi
movl $1, %edx
LBB4_4: # %for.body
movl (%rsi,%rdx,4), %eax
cmpl %ecx, %eax
# CHECK: cmpl %ecx, %eax
# CHECK-NEXT: cmovlel %ecx, %eax
LBB4_4_br:
jg LBB4_6
# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_5# 1 2
# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_6# 1 2
# %bb.5: # %for.body
LBB4_5:
movl %ecx, %eax
LBB4_6: # %for.body
addq $1, %rdx
movl %eax, %ecx
cmpq %rdx, %rdi
je LBB4_2
jmp LBB4_4
Lfunc_end4:
.size MaxValue, Lfunc_end4-MaxValue
.cfi_endproc
# -- End function
.globl BinarySearch # -- Begin function BinarySearch
.p2align 4, 0x90
.type BinarySearch,@function
BinarySearch: # @BinarySearch
# CHECK-LABEL: Binary Function "BinarySearch" after CMOV conversion
# FDATA: 0 [unknown] 0 1 BinarySearch 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl (%rsi), %eax
jmp LBB5_2
LBB5_1: # %while.body
movl %ecx, %eax
xorl %ecx, %ecx
btl %eax, %edi
setae %cl
movq 8(%rdx,%rcx,8), %rdx
LBB5_2: # %while.body
movl (%rdx), %ecx
cmpl %ecx, %eax
ja LBB5_1
# %bb.3: # %while.end
retq
Lfunc_end5:
.size BinarySearch, Lfunc_end5-BinarySearch
.cfi_endproc
# -- End function
.globl Transform # -- Begin function Transform
.p2align 4, 0x90
.type Transform,@function
Transform: # @Transform
# CHECK-LABEL: Binary Function "Transform" after CMOV conversion
# FDATA: 0 [unknown] 0 1 Transform 0 1 2
.cfi_startproc
# %bb.0: # %entry
movb $1, %al
testb %al, %al
jne LBB6_5
# %bb.1: # %while.body.preheader
movl %edx, %r8d
xorl %esi, %esi
LBB6_2: # %while.body
movslq %esi, %rsi
movl (%rdi,%rsi,4), %eax
xorl %edx, %edx
divl %r8d
movl %eax, %edx
movl $11, %eax
movl %r8d, %ecx
cmpl %r8d, %edx
ja LBB6_4
# %bb.3: # %while.body
movl $22, %eax
movl $22, %ecx
LBB6_4: # %while.body
xorl %edx, %edx
divl %ecx
movl %edx, (%rdi,%rsi,4)
addl $1, %esi
cmpl %r9d, %esi
ja LBB6_2
LBB6_5: # %while.end
retq
Lfunc_end6:
.size Transform, Lfunc_end6-Transform
.cfi_endproc
# -- End function
.globl test_cmov_memoperand # -- Begin function test_cmov_memoperand
.p2align 4, 0x90
.type test_cmov_memoperand,@function
test_cmov_memoperand: # @test_cmov_memoperand
# CHECK-LABEL: Binary Function "test_cmov_memoperand" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edx, %eax
cmpl %esi, %edi
ja LBB7_2
# %bb.1: # %entry
movl (%rcx), %eax
LBB7_2: # %entry
retq
Lfunc_end7:
.size test_cmov_memoperand, Lfunc_end7-test_cmov_memoperand
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_unpredictable # -- Begin function test_cmov_memoperand_unpredictable
.p2align 4, 0x90
.type test_cmov_memoperand_unpredictable,@function
test_cmov_memoperand_unpredictable: # @test_cmov_memoperand_unpredictable
# CHECK-LABEL: Binary Function "test_cmov_memoperand_unpredictable" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_unpredictable 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edx, %eax
cmpl %esi, %edi
ja LBB8_2
# %bb.1: # %entry
movl (%rcx), %eax
LBB8_2: # %entry
retq
Lfunc_end8:
.size test_cmov_memoperand_unpredictable, Lfunc_end8-test_cmov_memoperand_unpredictable
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_in_group # -- Begin function test_cmov_memoperand_in_group
.p2align 4, 0x90
.type test_cmov_memoperand_in_group,@function
test_cmov_memoperand_in_group: # @test_cmov_memoperand_in_group
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edx, %eax
movl %edx, %r8d
cmpl %esi, %edi
ja LBB9_2
# %bb.1: # %entry
movl (%rcx), %r8d
movl %edi, %eax
movl %esi, %edx
LBB9_2: # %entry
addl %r8d, %eax
addl %edx, %eax
retq
Lfunc_end9:
.size test_cmov_memoperand_in_group, Lfunc_end9-test_cmov_memoperand_in_group
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_in_group2 # -- Begin function test_cmov_memoperand_in_group2
.p2align 4, 0x90
.type test_cmov_memoperand_in_group2,@function
test_cmov_memoperand_in_group2: # @test_cmov_memoperand_in_group2
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group2" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group2 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edx, %eax
movl %edx, %r8d
cmpl %esi, %edi
jbe LBB10_2
# %bb.1: # %entry
movl (%rcx), %r8d
movl %edi, %eax
movl %esi, %edx
LBB10_2: # %entry
addl %r8d, %eax
addl %edx, %eax
retq
Lfunc_end10:
.size test_cmov_memoperand_in_group2, Lfunc_end10-test_cmov_memoperand_in_group2
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_conflicting_dir # -- Begin function test_cmov_memoperand_conflicting_dir
.p2align 4, 0x90
.type test_cmov_memoperand_conflicting_dir,@function
test_cmov_memoperand_conflicting_dir: # @test_cmov_memoperand_conflicting_dir
# CHECK-LABEL: Binary Function "test_cmov_memoperand_conflicting_dir" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_conflicting_dir 0 1 2
.cfi_startproc
# %bb.0: # %entry
cmpl %esi, %edi
movl (%rcx), %eax
cmoval %edx, %eax
cmoval (%r8), %edx
addl %edx, %eax
retq
Lfunc_end11:
.size test_cmov_memoperand_conflicting_dir, Lfunc_end11-test_cmov_memoperand_conflicting_dir
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_in_group_reuse_for_addr # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr
.p2align 4, 0x90
.type test_cmov_memoperand_in_group_reuse_for_addr,@function
test_cmov_memoperand_in_group_reuse_for_addr: # @test_cmov_memoperand_in_group_reuse_for_addr
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edi, %eax
cmpl %esi, %edi
ja LBB12_2
# %bb.1: # %entry
movl (%rcx), %eax
LBB12_2: # %entry
retq
Lfunc_end12:
.size test_cmov_memoperand_in_group_reuse_for_addr, Lfunc_end12-test_cmov_memoperand_in_group_reuse_for_addr
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_in_group_reuse_for_addr2 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr2
.p2align 4, 0x90
.type test_cmov_memoperand_in_group_reuse_for_addr2,@function
test_cmov_memoperand_in_group_reuse_for_addr2: # @test_cmov_memoperand_in_group_reuse_for_addr2
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr2" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr2 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edi, %eax
cmpl %esi, %edi
ja LBB13_2
# %bb.1: # %entry
movq (%rcx), %rax
movl (%rax), %eax
LBB13_2: # %entry
retq
Lfunc_end13:
.size test_cmov_memoperand_in_group_reuse_for_addr2, Lfunc_end13-test_cmov_memoperand_in_group_reuse_for_addr2
.cfi_endproc
# -- End function
.globl test_cmov_memoperand_in_group_reuse_for_addr3 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr3
.p2align 4, 0x90
.type test_cmov_memoperand_in_group_reuse_for_addr3,@function
test_cmov_memoperand_in_group_reuse_for_addr3: # @test_cmov_memoperand_in_group_reuse_for_addr3
# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr3" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr3 0 1 2
.cfi_startproc
# %bb.0: # %entry
movl %edi, %eax
cmpl %esi, %edi
ja LBB14_2
# %bb.1: # %entry
movl (%rcx), %eax
LBB14_2: # %entry
retq
Lfunc_end14:
.size test_cmov_memoperand_in_group_reuse_for_addr3, Lfunc_end14-test_cmov_memoperand_in_group_reuse_for_addr3
.cfi_endproc
# -- End function
.globl test_memoperand_loop # -- Begin function test_memoperand_loop
.p2align 4, 0x90
.type test_memoperand_loop,@function
test_memoperand_loop: # @test_memoperand_loop
# CHECK-LABEL: Binary Function "test_memoperand_loop" after CMOV conversion
# FDATA: 0 [unknown] 0 1 test_memoperand_loop 0 1 2
.cfi_startproc
# %bb.0: # %entry
movq begin@GOTPCREL(%rip), %r8
movq (%r8), %rax
movq end@GOTPCREL(%rip), %rcx
movq (%rcx), %rdx
xorl %esi, %esi
movq %rax, %rcx
LBB15_1: # %loop.body
addq $8, %rcx
cmpq %rdx, %rcx
ja LBB15_3
# %bb.2: # %loop.body
movq (%r8), %rcx
LBB15_3: # %loop.body
movl %edi, (%rcx)
addq $8, %rcx
cmpq %rdx, %rcx
# CHECK: movl %edi, (%rcx)
# CHECK-NEXT: addq $0x8, %rcx
# CHECK-NEXT: cmpq %rdx, %rcx
# CHECK-NEXT: cmovbeq %rax, %rcx
LBB15_3_br:
ja LBB15_5
# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_4# 1 2
# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_5# 1 2
# %bb.4: # %loop.body
LBB15_4:
movq %rax, %rcx
LBB15_5: # %loop.body
movl %edi, (%rcx)
addl $1, %esi
cmpl $1024, %esi # imm = 0x400
jl LBB15_1
# %bb.6: # %exit
retq
Lfunc_end15:
.size test_memoperand_loop, Lfunc_end15-test_memoperand_loop
.cfi_endproc
# -- End function
.globl CmovBackToBack # -- Begin function CmovBackToBack
.p2align 4, 0x90
.type CmovBackToBack,@function
CmovBackToBack: # @CmovBackToBack
# CHECK-LABEL: Binary Function "CmovBackToBack" after CMOV conversion
# FDATA: 0 [unknown] 0 1 CmovBackToBack 0 1 2
.cfi_startproc
testl %edi, %edi
jle LBB16_5
movl %edi, %r8d
xorl %edi, %edi
# FDATA: 0 [unknown] 0 1 CmovBackToBack #LBB16_2# 1 2
LBB16_2: # %for.body
movl (%rcx,%rdi,4), %eax
leal 1(%rax), %r9d
imull %esi, %eax
movl $10, %r10d
cmpl %edx, %eax
# CHECK: cmpl %edx, %eax
# CHECK-NEXT: cmovlel %r9d, %r10d
LBB16_2_br:
jg LBB16_4
# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_3# 1 2
# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_4# 1 2
LBB16_3:
movl %r9d, %r10d
LBB16_4: # %for.body
# CHECK-NEXT: cmovlel %r9d, %r10d
LBB16_6_br:
jg LBB16_8
# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_7# 1 2
# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_8# 1 2
LBB16_7:
movl %r9d, %r10d
LBB16_8: # %for.body
imull %r9d, %r10d
movl %r10d, (%rcx,%rdi,4)
addq $1, %rdi
cmpq %rdi, %r8
jne LBB16_2
LBB16_5: # %for.cond.cleanup
retq
Lfunc_end16:
.size CmovBackToBack, Lfunc_end16-CmovBackToBack
.cfi_endproc
# -- End function
.data
.globl begin
begin:
.quad 0xdeadbeef
.globl end
end:
.quad 0xfaceb00c