llvm/bolt/BinaryPasses.cpp
Bill Nell c27a6a5c63 Add verbosity level and clean up stream usage.
Summary:
I've added a verbosity level to help keep the BOLT spewage to a minimum.
The default level is pretty terse now, level 1 is closer to the original,
I've saved level 2 for the noisiest of messages.  Error messages should
never be suppressed by the verbosity level only warnings and info messages.

The rational behind stream usage is as follows:
outs() for info and debugging controlled by command line flags.
errs() for errors and warnings.
dbgs() for output within DEBUG().

With the exception of a few of the level 2 messages I don't have any strong feelings about the others.

(cherry picked from FBD3814259)
2016-09-02 14:15:29 -07:00

1307 lines
44 KiB
C++

//===--- BinaryPasses.cpp - Binary-level analysis/optimization passes -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryPasses.h"
#include "llvm/Support/Options.h"
#include <unordered_map>
#define DEBUG_TYPE "bolt"
namespace opts {
extern llvm::cl::opt<unsigned> Verbosity;
extern llvm::cl::opt<bool> PrintAll;
extern llvm::cl::opt<bool> DumpDotAll;
extern llvm::cl::opt<bool> PrintReordered;
extern llvm::cl::opt<bool> PrintEHRanges;
extern llvm::cl::opt<bool> PrintUCE;
extern llvm::cl::opt<bool> PrintPeepholes;
extern llvm::cl::opt<bool> PrintSimplifyROLoads;
extern llvm::cl::opt<bool> PrintICF;
extern llvm::cl::opt<llvm::bolt::BinaryFunction::SplittingType> SplitFunctions;
extern bool shouldProcess(const llvm::bolt::BinaryFunction &Function);
static llvm::cl::opt<llvm::bolt::BinaryFunction::LayoutType>
ReorderBlocks(
"reorder-blocks",
llvm::cl::desc("change layout of basic blocks in a function"),
llvm::cl::init(llvm::bolt::BinaryFunction::LT_NONE),
llvm::cl::values(clEnumValN(llvm::bolt::BinaryFunction::LT_NONE,
"none",
"do not reorder basic blocks"),
clEnumValN(llvm::bolt::BinaryFunction::LT_REVERSE,
"reverse",
"layout blocks in reverse order"),
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE,
"normal",
"perform optimal layout based on profile"),
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE_BRANCH,
"branch-predictor",
"perform optimal layout prioritizing branch "
"predictions"),
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE_CACHE,
"cache",
"perform optimal layout prioritizing I-cache "
"behavior"),
clEnumValEnd));
static llvm::cl::opt<bool>
MinBranchClusters(
"min-branch-clusters",
llvm::cl::desc("use a modified clustering algorithm geared towards "
"minimizing branches"),
llvm::cl::Hidden);
} // namespace opts
namespace llvm {
namespace bolt {
void OptimizeBodylessFunctions::analyze(
BinaryFunction &BF,
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs) {
if (BF.size() != 1 || (*BF.begin()).size() == 0)
return;
auto &BB = *BF.begin();
const auto &FirstInst = *BB.begin();
if (!BC.MIA->isTailCall(FirstInst))
return;
auto &Op1 = FirstInst.getOperand(0);
if (!Op1.isExpr())
return;
auto Expr = dyn_cast<MCSymbolRefExpr>(Op1.getExpr());
if (!Expr)
return;
const auto *Function = BC.getFunctionForSymbol(&Expr->getSymbol());
if (!Function)
return;
EquivalentCallTarget[BF.getSymbol()] = Function;
}
void OptimizeBodylessFunctions::optimizeCalls(BinaryFunction &BF,
BinaryContext &BC) {
for (auto BBIt = BF.begin(), BBEnd = BF.end(); BBIt != BBEnd; ++BBIt) {
for (auto InstIt = (*BBIt).begin(), InstEnd = (*BBIt).end();
InstIt != InstEnd; ++InstIt) {
auto &Inst = *InstIt;
if (!BC.MIA->isCall(Inst))
continue;
auto &Op1 = Inst.getOperand(0);
if (!Op1.isExpr())
continue;
auto Expr = dyn_cast<MCSymbolRefExpr>(Op1.getExpr());
if (!Expr)
continue;
auto *OriginalTarget = &Expr->getSymbol();
auto *Target = OriginalTarget;
// Iteratively update target since we could have f1() calling f2()
// calling f3() calling f4() and we want to output f1() directly
// calling f4().
while (EquivalentCallTarget.count(Target)) {
Target = EquivalentCallTarget.find(Target)->second->getSymbol();
}
if (Target == OriginalTarget)
continue;
DEBUG(dbgs() << "BOLT-DEBUG: Optimizing " << (*BBIt).getName()
<< " in " << BF
<< ": replacing call to " << OriginalTarget->getName()
<< " by call to " << Target->getName() << "\n");
BC.MIA->replaceCallTargetOperand(Inst, Target, BC.Ctx.get());
}
}
}
void OptimizeBodylessFunctions::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
for (auto &It : BFs) {
auto &Function = It.second;
if (Function.isSimple() && opts::shouldProcess(Function)) {
analyze(Function, BC, BFs);
}
}
for (auto &It : BFs) {
auto &Function = It.second;
if (Function.isSimple() && opts::shouldProcess(Function)) {
optimizeCalls(Function, BC);
}
}
}
void InlineSmallFunctions::findInliningCandidates(
BinaryContext &BC,
const std::map<uint64_t, BinaryFunction> &BFs) {
for (const auto &BFIt : BFs) {
const auto &Function = BFIt.second;
if (!Function.isSimple() ||
!opts::shouldProcess(Function) ||
Function.size() != 1)
continue;
auto &BB = *Function.begin();
const auto &LastInstruction = *BB.rbegin();
// Check if the function is small enough and doesn't do a tail call.
// The size we use includes pseudo-instructions but here they shouldn't
// matter. So some opportunities may be missed because of this.
if (BB.size() > 0 &&
BB.size() <= kMaxInstructions &&
BC.MIA->isReturn(LastInstruction) &&
!BC.MIA->isTailCall(LastInstruction)) {
InliningCandidates.insert(&Function);
}
}
DEBUG(dbgs() << "BOLT-DEBUG: " << InliningCandidates.size()
<< " inlineable functions.\n");
}
void InlineSmallFunctions::findInliningCandidatesAggressive(
BinaryContext &BC,
const std::map<uint64_t, BinaryFunction> &BFs) {
std::set<std::string> OverwrittenFunctions = {
"_ZN4HPHP13hash_string_iEPKcj",
"_ZN4HPHP21hash_string_cs_unsafeEPKcj",
"_ZN4HPHP14hash_string_csEPKcj",
"_ZN4HPHP20hash_string_i_unsafeEPKcj",
"_ZNK4HPHP10StringData10hashHelperEv"
};
for (const auto &BFIt : BFs) {
const auto &Function = BFIt.second;
if (!Function.isSimple() ||
!opts::shouldProcess(Function) ||
OverwrittenFunctions.count(Function.getSymbol()->getName()) ||
Function.hasEHRanges())
continue;
uint64_t FunctionSize = 0;
for (const auto *BB : Function.layout()) {
FunctionSize += BC.computeCodeSize(BB->begin(), BB->end());
}
assert(FunctionSize > 0 && "found empty function");
if (FunctionSize > kMaxSize)
continue;
bool FoundCFI = false;
for (const auto BB : Function.layout()) {
for (const auto &Inst : *BB) {
if (BC.MIA->isEHLabel(Inst) || BC.MIA->isCFI(Inst)) {
FoundCFI = true;
break;
}
}
}
if (!FoundCFI)
InliningCandidates.insert(&Function);
}
DEBUG(dbgs() << "BOLT-DEBUG: " << InliningCandidates.size()
<< " inlineable functions.\n");
}
namespace {
/// Returns whether a function creates a stack frame for itself or not.
/// If so, we need to manipulate the stack pointer when calling this function.
/// Since we're only inlining very small functions, we return false for now, but
/// we could for instance check if the function starts with 'push ebp'.
/// TODO generalize this.
bool createsStackFrame(const BinaryBasicBlock &) {
return false;
}
} // namespace
void InlineSmallFunctions::inlineCall(
BinaryContext &BC,
BinaryBasicBlock &BB,
MCInst *CallInst,
const BinaryBasicBlock &InlinedFunctionBB) {
assert(BC.MIA->isCall(*CallInst) && "Can only inline a call.");
assert(BC.MIA->isReturn(*InlinedFunctionBB.rbegin()) &&
"Inlined function should end with a return.");
std::vector<MCInst> InlinedInstance;
bool ShouldAdjustStack = createsStackFrame(InlinedFunctionBB);
// Move stack like 'call' would if needed.
if (ShouldAdjustStack) {
MCInst StackInc;
BC.MIA->createStackPointerIncrement(StackInc);
InlinedInstance.push_back(StackInc);
}
for (auto Instruction : InlinedFunctionBB) {
if (BC.MIA->isReturn(Instruction)) {
break;
}
if (!BC.MIA->isEHLabel(Instruction) &&
!BC.MIA->isCFI(Instruction)) {
InlinedInstance.push_back(Instruction);
}
}
// Move stack pointer like 'ret' would.
if (ShouldAdjustStack) {
MCInst StackDec;
BC.MIA->createStackPointerDecrement(StackDec);
InlinedInstance.push_back(StackDec);
}
BB.replaceInstruction(CallInst, InlinedInstance);
}
std::pair<BinaryBasicBlock *, unsigned>
InlineSmallFunctions::inlineCall(
BinaryContext &BC,
BinaryFunction &CallerFunction,
BinaryBasicBlock *CallerBB,
const unsigned CallInstIndex,
const BinaryFunction &InlinedFunction) {
// Get the instruction to be replaced with inlined code.
MCInst &CallInst = CallerBB->getInstructionAtIndex(CallInstIndex);
assert(BC.MIA->isCall(CallInst) && "Can only inline a call.");
// Point in the function after the inlined code.
BinaryBasicBlock *AfterInlinedBB = nullptr;
unsigned AfterInlinedIstrIndex = 0;
// In case of a tail call we should not remove any ret instructions from the
// inlined instance.
bool IsTailCall = BC.MIA->isTailCall(CallInst);
// The first block of the function to be inlined can be merged with the caller
// basic block. This cannot happen if there are jumps to the first block.
bool CanMergeFirstInlinedBlock = (*InlinedFunction.begin()).pred_size() == 0;
// If the call to be inlined is not at the end of its basic block and we have
// to inline more than one basic blocks (or even just one basic block that
// cannot be merged into the caller block), then the caller's basic block
// should be split.
bool ShouldSplitCallerBB =
CallInstIndex < CallerBB->size() - 1 &&
(InlinedFunction.size() > 1 || !CanMergeFirstInlinedBlock);
// Copy inlined function's basic blocks into a vector of basic blocks that
// will be inserted in the caller function (the inlined instance). Also, we
// keep a mapping from basic block index to the corresponding block in the
// inlined instance.
std::vector<std::unique_ptr<BinaryBasicBlock>> InlinedInstance;
std::vector<BinaryBasicBlock *>
BBIndexToInlinedInstanceBB(InlinedFunction.size(), nullptr);
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)] =
InlinedInstance.back().get();
if (InlinedFunction.hasValidProfile())
InlinedInstance.back()->setExecutionCount(
InlinedFunctionBB->getExecutionCount());
}
if (ShouldSplitCallerBB) {
// Add one extra block at the inlined instance for the removed part of the
// caller block.
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
BBIndexToInlinedInstanceBB.push_back(InlinedInstance.back().get());
if (CallerFunction.hasValidProfile())
InlinedInstance.back()->setExecutionCount(CallerBB->getExecutionCount());
}
// Copy instructions to the basic blocks of the inlined instance.
unsigned InlinedInstanceBBIndex = 0;
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
// Get the corresponding block of the inlined instance.
auto *InlinedInstanceBB = InlinedInstance[InlinedInstanceBBIndex].get();
assert(InlinedInstanceBB ==
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)]);
bool IsExitingBlock = false;
// Copy instructions into the inlined instance.
for (auto Instruction : *InlinedFunctionBB) {
if (!IsTailCall &&
BC.MIA->isReturn(Instruction) &&
!BC.MIA->isTailCall(Instruction)) {
// Skip returns when the caller does a normal call as opposed to a tail
// call.
IsExitingBlock = true;
continue;
}
if (!IsTailCall &&
BC.MIA->isTailCall(Instruction)) {
// Convert tail calls to normal calls when the caller does a normal
// call.
if (!BC.MIA->convertTailCallToCall(Instruction))
assert(false && "unexpected tail call opcode found");
IsExitingBlock = true;
}
if (BC.MIA->isBranch(Instruction) &&
!BC.MIA->isIndirectBranch(Instruction)) {
// Convert the branch targets in the branch instructions that will be
// added to the inlined instance.
const MCSymbol *OldTargetLabel = nullptr;
const MCSymbol *OldFTLabel = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
assert(BC.MIA->analyzeBranch(Instruction, OldTargetLabel, OldFTLabel,
CondBranch, UncondBranch));
assert(OldTargetLabel);
const MCSymbol *NewTargetLabel = nullptr;
for (const auto SuccBB : InlinedFunctionBB->successors()) {
if (SuccBB->getLabel() == OldTargetLabel) {
const auto InlinedInstanceSuccBB =
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(SuccBB)];
NewTargetLabel = InlinedInstanceSuccBB->getLabel();
break;
}
}
assert(NewTargetLabel);
BC.MIA->replaceBranchTarget(Instruction, NewTargetLabel, BC.Ctx.get());
}
// TODO; Currently we simply ignore CFI instructions but we need to
// address them for correctness.
if (!BC.MIA->isEHLabel(Instruction) &&
!BC.MIA->isCFI(Instruction)) {
InlinedInstanceBB->addInstruction(std::move(Instruction));
}
}
// Add CFG edges to the basic blocks of the inlined instance.
std::vector<BinaryBasicBlock *>
Successors(InlinedFunctionBB->succ_size(), nullptr);
std::transform(
InlinedFunctionBB->succ_begin(),
InlinedFunctionBB->succ_end(),
Successors.begin(),
[&InlinedFunction, &BBIndexToInlinedInstanceBB]
(const BinaryBasicBlock *BB) {
return BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(BB)];
});
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessors(
Successors.begin(),
Successors.end(),
InlinedFunctionBB->branch_info_begin(),
InlinedFunctionBB->branch_info_end());
} else {
InlinedInstanceBB->addSuccessors(
Successors.begin(),
Successors.end());
}
if (IsExitingBlock) {
assert(Successors.size() == 0);
if (ShouldSplitCallerBB) {
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessor(
InlinedInstance.back().get(),
InlinedInstanceBB->getExecutionCount());
} else {
InlinedInstanceBB->addSuccessor(InlinedInstance.back().get());
}
MCInst ExitBranchInst;
const MCSymbol *ExitLabel = InlinedInstance.back().get()->getLabel();
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
} else if (InlinedInstanceBBIndex > 0 || !CanMergeFirstInlinedBlock) {
assert(CallInstIndex == CallerBB->size() - 1);
assert(CallerBB->succ_size() <= 1);
if (CallerBB->succ_size() == 1) {
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessor(
*CallerBB->succ_begin(),
InlinedInstanceBB->getExecutionCount());
} else {
InlinedInstanceBB->addSuccessor(*CallerBB->succ_begin());
}
MCInst ExitBranchInst;
const MCSymbol *ExitLabel = (*CallerBB->succ_begin())->getLabel();
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
}
}
}
++InlinedInstanceBBIndex;
}
if (ShouldSplitCallerBB) {
// Split the basic block that contains the call and add the removed
// instructions in the last block of the inlined instance.
// (Is it OK to have a basic block with just CFI instructions?)
std::vector<MCInst> TrailInstructions =
std::move(CallerBB->splitInstructions(&CallInst));
assert(TrailInstructions.size() > 0);
InlinedInstance.back()->addInstructions(
TrailInstructions.begin(),
TrailInstructions.end());
// Add CFG edges for the block with the removed instructions.
if (CallerFunction.hasValidProfile()) {
InlinedInstance.back()->addSuccessors(
CallerBB->succ_begin(),
CallerBB->succ_end(),
CallerBB->branch_info_begin(),
CallerBB->branch_info_end());
} else {
InlinedInstance.back()->addSuccessors(
CallerBB->succ_begin(),
CallerBB->succ_end());
}
// Update the after-inlined point.
AfterInlinedBB = InlinedInstance.back().get();
AfterInlinedIstrIndex = 0;
}
assert(InlinedInstance.size() > 0 && "found function with no basic blocks");
assert(InlinedInstance.front()->size() > 0 &&
"found function with empty basic block");
// If the inlining cannot happen as a simple instruction insertion into
// CallerBB, we remove the outgoing CFG edges of the caller block.
if (InlinedInstance.size() > 1 || !CanMergeFirstInlinedBlock) {
CallerBB->removeSuccessors(CallerBB->succ_begin(), CallerBB->succ_end());
if (!ShouldSplitCallerBB) {
// Update the after-inlined point.
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
AfterInlinedIstrIndex = 0;
}
} else {
assert(!ShouldSplitCallerBB);
// Update the after-inlined point.
if (CallInstIndex < CallerBB->size() - 1) {
AfterInlinedBB = CallerBB;
AfterInlinedIstrIndex =
CallInstIndex + InlinedInstance.front()->size();
} else {
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
AfterInlinedIstrIndex = 0;
}
}
// Do the inlining by merging the first block of the inlined instance into
// the caller basic block if possible and adding the rest of the inlined
// instance basic blocks in the caller function.
if (CanMergeFirstInlinedBlock) {
CallerBB->replaceInstruction(
&CallInst,
InlinedInstance.front()->begin(),
InlinedInstance.front()->end());
if (InlinedInstance.size() > 1) {
auto FirstBB = InlinedInstance.begin()->get();
if (InlinedFunction.hasValidProfile()) {
CallerBB->addSuccessors(
FirstBB->succ_begin(),
FirstBB->succ_end(),
FirstBB->branch_info_begin(),
FirstBB->branch_info_end());
} else {
CallerBB->addSuccessors(
FirstBB->succ_begin(),
FirstBB->succ_end());
}
FirstBB->removeSuccessors(FirstBB->succ_begin(), FirstBB->succ_end());
}
InlinedInstance.erase(InlinedInstance.begin());
} else {
CallerBB->eraseInstruction(&CallInst);
if (CallerFunction.hasValidProfile()) {
CallerBB->addSuccessor(InlinedInstance.front().get(),
CallerBB->getExecutionCount());
} else {
CallerBB->addSuccessor(InlinedInstance.front().get(),
CallerBB->getExecutionCount());
}
}
unsigned NumBlocksToAdd = InlinedInstance.size();
CallerFunction.insertBasicBlocks(CallerBB, std::move(InlinedInstance));
CallerFunction.updateLayout(CallerBB, NumBlocksToAdd);
CallerFunction.fixBranches();
return std::make_pair(AfterInlinedBB, AfterInlinedIstrIndex);
}
bool InlineSmallFunctions::inlineCallsInFunction(
BinaryContext &BC,
BinaryFunction &Function) {
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
Function.layout().end());
std::sort(Blocks.begin(), Blocks.end(),
[](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
return BB1->getExecutionCount() > BB2->getExecutionCount();
});
uint32_t ExtraSize = 0;
for (auto BB : Blocks) {
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ++InstIt) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst)) {
totalDynamicCalls += BB->getExecutionCount();
}
}
}
bool DidInlining = false;
for (auto BB : Blocks) {
if (BB->isCold())
continue;
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst) &&
!BC.MIA->isTailCall(Inst) &&
Inst.size() == 1 &&
Inst.getOperand(0).isExpr()) {
auto Target = dyn_cast<MCSymbolRefExpr>(
Inst.getOperand(0).getExpr());
assert(Target && "Not MCSymbolRefExpr");
const auto *TargetFunction =
BC.getFunctionForSymbol(&Target->getSymbol());
if (TargetFunction) {
bool CallToInlineableFunction =
InliningCandidates.count(TargetFunction);
totalInlineableCalls +=
CallToInlineableFunction * BB->getExecutionCount();
if (CallToInlineableFunction &&
TargetFunction->getSize() + ExtraSize
+ Function.estimateHotSize() < Function.getMaxSize()) {
auto NextInstIt = std::next(InstIt);
inlineCall(BC, *BB, &Inst, *TargetFunction->begin());
DidInlining = true;
DEBUG(dbgs() << "BOLT-DEBUG: Inlining call to "
<< *TargetFunction << " in "
<< Function << "\n");
InstIt = NextInstIt;
ExtraSize += TargetFunction->getSize();
inlinedDynamicCalls += BB->getExecutionCount();
continue;
}
}
}
++InstIt;
}
}
return DidInlining;
}
bool InlineSmallFunctions::inlineCallsInFunctionAggressive(
BinaryContext &BC,
BinaryFunction &Function) {
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
Function.layout().end());
std::sort(Blocks.begin(), Blocks.end(),
[](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
return BB1->getExecutionCount() > BB2->getExecutionCount();
});
uint32_t ExtraSize = 0;
for (auto BB : Blocks) {
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ++InstIt) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst)) {
totalDynamicCalls += BB->getExecutionCount();
}
}
}
bool DidInlining = false;
for (auto BB : Blocks) {
if (BB->isCold())
continue;
unsigned InstIndex = 0;
for (auto InstIt = BB->begin(); InstIt != BB->end(); ) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst) &&
Inst.size() == 1 &&
Inst.getOperand(0).isExpr()) {
assert(!BC.MIA->isInvoke(Inst));
auto Target = dyn_cast<MCSymbolRefExpr>(
Inst.getOperand(0).getExpr());
assert(Target && "Not MCSymbolRefExpr");
const auto *TargetFunction =
BC.getFunctionForSymbol(&Target->getSymbol());
if (TargetFunction) {
bool CallToInlineableFunction =
InliningCandidates.count(TargetFunction);
totalInlineableCalls +=
CallToInlineableFunction * BB->getExecutionCount();
if (CallToInlineableFunction &&
TargetFunction->getSize() + ExtraSize
+ Function.estimateHotSize() < Function.getMaxSize()) {
unsigned NextInstIndex = 0;
BinaryBasicBlock *NextBB = nullptr;
std::tie(NextBB, NextInstIndex) =
inlineCall(BC, Function, BB, InstIndex, *TargetFunction);
DidInlining = true;
DEBUG(dbgs() << "BOLT-DEBUG: Inlining call to "
<< *TargetFunction << " in "
<< Function << "\n");
InstIndex = NextBB == BB ? NextInstIndex : BB->size();
InstIt = NextBB == BB ? BB->begin() + NextInstIndex : BB->end();
ExtraSize += TargetFunction->getSize();
inlinedDynamicCalls += BB->getExecutionCount();
continue;
}
}
}
++InstIndex;
++InstIt;
}
}
return DidInlining;
}
void InlineSmallFunctions::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
findInliningCandidates(BC, BFs);
std::vector<BinaryFunction *> ConsideredFunctions;
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple() || !opts::shouldProcess(Function))
continue;
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
ConsideredFunctions.push_back(&Function);
}
std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(),
[](BinaryFunction *A, BinaryFunction *B) {
return B->getExecutionCount() < A->getExecutionCount();
});
unsigned ModifiedFunctions = 0;
for (unsigned i = 0; i < ConsideredFunctions.size() &&
ModifiedFunctions <= kMaxFunctions; ++i) {
auto &Function = *ConsideredFunctions[i];
if (inlineCallsInFunction(BC, Function))
++ModifiedFunctions;
}
DEBUG(dbgs() << "BOLT-DEBUG: Inlined " << inlinedDynamicCalls << " of "
<< totalDynamicCalls << " function calls in the profile.\n");
DEBUG(dbgs() << "BOLT-DEBUG: Inlined calls represent "
<< (100.0 * inlinedDynamicCalls / totalInlineableCalls)
<< "% of all inlineable calls in the profile.\n");
}
void EliminateUnreachableBlocks::runOnFunction(BinaryFunction& Function) {
if (!Function.isSimple() || !opts::shouldProcess(Function)) return;
// FIXME: this wouldn't work with C++ exceptions until we implement
// support for those as there will be "invisible" edges
// in the graph.
if (Function.layout_size() > 0) {
if (NagUser) {
if (opts::Verbosity >= 1) {
errs()
<< "BOLT-WARNING: Using -eliminate-unreachable is experimental and "
"unsafe for exceptions\n";
}
NagUser = false;
}
if (Function.hasEHRanges()) return;
std::stack<BinaryBasicBlock*> Stack;
std::map<BinaryBasicBlock *, bool> Reachable;
BinaryBasicBlock *Entry = *Function.layout_begin();
Stack.push(Entry);
Reachable[Entry] = true;
// Determine reachable BBs from the entry point
while (!Stack.empty()) {
auto BB = Stack.top();
Stack.pop();
for (auto Succ : BB->successors()) {
if (Reachable[Succ])
continue;
Reachable[Succ] = true;
Stack.push(Succ);
}
}
auto Count = Function.eraseDeadBBs(Reachable);
if (Count) {
DEBUG(dbgs() << "BOLT: Removed " << Count
<< " dead basic block(s) in function " << Function << '\n');
}
if (opts::PrintAll || opts::PrintUCE)
Function.print(outs(), "after unreachable code elimination", true);
if (opts::DumpDotAll)
Function.dumpGraphForPass("unreachable-code");
}
}
void EliminateUnreachableBlocks::runOnFunctions(
BinaryContext&,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
for (auto &It : BFs) {
runOnFunction(It.second);
}
}
void ReorderBasicBlocks::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple())
continue;
if (!opts::shouldProcess(Function))
continue;
if (opts::ReorderBlocks != BinaryFunction::LT_NONE) {
bool ShouldSplit =
(opts::SplitFunctions == BinaryFunction::ST_ALL) ||
(opts::SplitFunctions == BinaryFunction::ST_EH &&
Function.hasEHRanges()) ||
(LargeFunctions.find(It.first) != LargeFunctions.end());
Function.modifyLayout(opts::ReorderBlocks, opts::MinBranchClusters,
ShouldSplit);
if (opts::PrintAll || opts::PrintReordered)
Function.print(outs(), "after reordering blocks", true);
if (opts::DumpDotAll)
Function.dumpGraphForPass("reordering");
}
}
}
void FixupFunctions::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple())
continue;
if (!opts::shouldProcess(Function))
continue;
// Fix the CFI state.
if (!Function.fixCFIState()) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: unable to fix CFI state for function "
<< Function << ". Skipping.\n";
}
Function.setSimple(false);
continue;
}
// Update exception handling information.
Function.updateEHRanges();
if (opts::PrintAll || opts::PrintEHRanges)
Function.print(outs(), "after updating EH ranges", true);
if (opts::DumpDotAll)
Function.dumpGraphForPass("update-EH-ranges");
}
}
bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
BinaryFunction &BF) {
if (BF.layout_size() == 0)
return false;
auto &MIA = BC.MIA;
uint64_t NumLocalTailCalls = 0;
uint64_t NumLocalPatchedTailCalls = 0;
for (auto* BB : BF.layout()) {
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
// Determine the control flow at the end of each basic block
if (!BB->analyzeBranch(*MIA, TBB, FBB, CondBranch, UncondBranch)) {
continue;
}
// TODO: do we need to test for other branch patterns?
// For this particular case, the first basic block ends with
// a conditional branch and has two successors, one fall-through
// and one for when the condition is true.
// The target of the conditional is a basic block with a single
// unconditional branch (i.e. tail call) to another function.
// We don't care about the contents of the fall-through block.
// Note: this code makes the assumption that the fall-through
// block is the last successor.
if (CondBranch && !UncondBranch && BB->succ_size() == 2) {
// Find conditional branch target assuming the fall-through is
// always the last successor.
auto *CondTargetBB = *BB->succ_begin();
// Does the BB contain a single instruction?
if (CondTargetBB->size() - CondTargetBB->getNumPseudos() == 1) {
// Check to see if the sole instruction is a tail call.
auto const &Instr = *CondTargetBB->begin();
if (MIA->isTailCall(Instr)) {
++NumTailCallCandidates;
++NumLocalTailCalls;
auto const &TailTargetSymExpr =
cast<MCSymbolRefExpr>(Instr.getOperand(0).getExpr());
auto const &TailTarget = TailTargetSymExpr->getSymbol();
// Lookup the address for the tail call target.
auto const TailAddress = BC.GlobalSymbols.find(TailTarget.getName());
if (TailAddress == BC.GlobalSymbols.end())
continue;
// Check to make sure we would be doing a forward jump.
// This assumes the address range of the current BB and the
// tail call target address don't overlap.
if (BF.getAddress() < TailAddress->second) {
++NumTailCallsPatched;
++NumLocalPatchedTailCalls;
// Is the original jump forward or backward?
const bool isForward =
TailAddress->second > BF.getAddress() + BB->getOffset();
if (isForward) ++NumOrigForwardBranches;
// Patch the new target address into the conditional branch.
CondBranch->getOperand(0).setExpr(TailTargetSymExpr);
// Remove the unused successor which may be eliminated later
// if there are no other users.
BB->removeSuccessor(CondTargetBB);
DEBUG(dbgs() << "patched " << (isForward ? "(fwd)" : "(back)")
<< " tail call in " << BF << ".\n";);
}
}
}
}
}
DEBUG(dbgs() << "BOLT: patched " << NumLocalPatchedTailCalls
<< " tail calls (" << NumOrigForwardBranches << " forward)"
<< " from a total of " << NumLocalTailCalls
<< " in function " << BF << "\n";);
return NumLocalPatchedTailCalls > 0;
}
void SimplifyConditionalTailCalls::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple())
continue;
// Fix tail calls to reduce branch mispredictions.
if (fixTailCalls(BC, Function)) {
if (opts::PrintAll || opts::PrintReordered) {
Function.print(outs(), "after tail call patching", true);
}
if (opts::DumpDotAll) {
Function.dumpGraphForPass("tail-call-patching");
}
}
}
outs() << "BOLT-INFO: patched " << NumTailCallsPatched
<< " tail calls (" << NumOrigForwardBranches << " forward)"
<< " from a total of " << NumTailCallCandidates << "\n";
}
void Peepholes::shortenInstructions(BinaryContext &BC,
BinaryFunction &Function) {
for (auto &BB : Function) {
for (auto &Inst : BB) {
BC.MIA->shortenInstruction(Inst);
}
}
}
void Peepholes::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
for (auto &It : BFs) {
auto &Function = It.second;
if (Function.isSimple() && opts::shouldProcess(Function)) {
shortenInstructions(BC, Function);
if (opts::PrintAll || opts::PrintPeepholes) {
Function.print(outs(), "after peepholes", true);
}
if (opts::DumpDotAll) {
Function.dumpGraphForPass("peepholes");
}
}
}
}
bool SimplifyRODataLoads::simplifyRODataLoads(
BinaryContext &BC, BinaryFunction &BF) {
auto &MIA = BC.MIA;
uint64_t NumLocalLoadsSimplified = 0;
uint64_t NumDynamicLocalLoadsSimplified = 0;
uint64_t NumLocalLoadsFound = 0;
uint64_t NumDynamicLocalLoadsFound = 0;
for (auto *BB : BF.layout()) {
for (auto &Inst : *BB) {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = BC.MII->get(Opcode);
// Skip instructions that do not load from memory.
if (!Desc.mayLoad())
continue;
// Try to statically evaluate the target memory address;
uint64_t TargetAddress;
if (MIA->hasRIPOperand(Inst)) {
// Try to find the symbol that corresponds to the rip-relative operand.
MCOperand DisplOp;
if (!MIA->getRIPOperandDisp(Inst, DisplOp))
continue;
assert(DisplOp.isExpr() &&
"found rip-relative with non-symbolic displacement");
// Get displacement symbol.
const MCSymbolRefExpr *DisplExpr;
if (!(DisplExpr = dyn_cast<MCSymbolRefExpr>(DisplOp.getExpr())))
continue;
const MCSymbol &DisplSymbol = DisplExpr->getSymbol();
// Look up the symbol address in the global symbols map of the binary
// context object.
auto GI = BC.GlobalSymbols.find(DisplSymbol.getName());
if (GI == BC.GlobalSymbols.end())
continue;
TargetAddress = GI->second;
} else if (!MIA->evaluateMemOperandTarget(Inst, TargetAddress)) {
continue;
}
// Get the contents of the section containing the target addresss of the
// memory operand. We are only interested in read-only sections.
ErrorOr<SectionRef> DataSectionOrErr =
BC.getSectionForAddress(TargetAddress);
if (!DataSectionOrErr)
continue;
SectionRef DataSection = DataSectionOrErr.get();
if (!DataSection.isReadOnly())
continue;
uint32_t Offset = TargetAddress - DataSection.getAddress();
StringRef ConstantData;
if (std::error_code EC = DataSection.getContents(ConstantData)) {
errs() << "BOLT-ERROR: 'cannot get section contents': "
<< EC.message() << ".\n";
exit(1);
}
++NumLocalLoadsFound;
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
NumDynamicLocalLoadsFound += BB->getExecutionCount();
if (MIA->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
++NumLocalLoadsSimplified;
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
}
}
}
NumLoadsFound += NumLocalLoadsFound;
NumDynamicLoadsFound += NumDynamicLocalLoadsFound;
NumLoadsSimplified += NumLocalLoadsSimplified;
NumDynamicLoadsSimplified += NumDynamicLocalLoadsSimplified;
return NumLocalLoadsSimplified > 0;
}
void SimplifyRODataLoads::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple())
continue;
if (simplifyRODataLoads(BC, Function)) {
if (opts::PrintAll || opts::PrintSimplifyROLoads) {
Function.print(outs(),
"after simplifying read-only section loads",
true);
}
if (opts::DumpDotAll) {
Function.dumpGraphForPass("simplify-rodata-loads");
}
}
}
outs() << "BOLT-INFO: simplified " << NumLoadsSimplified << " out of "
<< NumLoadsFound << " loads from a statically computed address.\n"
<< "BOLT-INFO: dynamic loads simplified: " << NumDynamicLoadsSimplified
<< "\n"
<< "BOLT-INFO: dynamic loads found: " << NumDynamicLoadsFound << "\n";
}
void IdenticalCodeFolding::discoverCallers(
BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {
for (auto &I : BFs) {
BinaryFunction &Caller = I.second;
if (!Caller.isSimple())
continue;
for (BinaryBasicBlock &BB : Caller) {
unsigned BlockIndex = Caller.getIndex(&BB);
unsigned InstrIndex = 0;
for (MCInst &Inst : BB) {
if (!BC.MIA->isCall(Inst)) {
++InstrIndex;
continue;
}
const MCOperand &TargetOp = Inst.getOperand(0);
if (!TargetOp.isExpr()) {
// This is an inderect call, we cannot record
// a target.
++InstrIndex;
continue;
}
// Find the target function for this call.
const auto *TargetExpr = TargetOp.getExpr();
assert(TargetExpr->getKind() == MCExpr::SymbolRef);
const auto &TargetSymbol =
dyn_cast<MCSymbolRefExpr>(TargetExpr)->getSymbol();
const auto *Function = BC.getFunctionForSymbol(&TargetSymbol);
if (!Function) {
// Call to a function without a BinaryFunction object.
++InstrIndex;
continue;
}
// Insert a tuple in the Callers map.
Callers[Function].emplace_back(
CallSite(&Caller, BlockIndex, InstrIndex));
++InstrIndex;
}
}
}
}
void IdenticalCodeFolding::foldFunction(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunction *BFToFold,
BinaryFunction *BFToReplaceWith,
std::set<BinaryFunction *> &Modified) {
// Mark BFToFold as identical with BFTOreplaceWith.
BFToFold->setIdenticalFunctionAddress(BFToReplaceWith->getAddress());
// Add the size of BFToFold to the total size savings estimate.
BytesSavedEstimate += BFToFold->getSize();
// Get callers of BFToFold.
auto CI = Callers.find(BFToFold);
if (CI == Callers.end())
return;
std::vector<CallSite> &BFToFoldCallers = CI->second;
// Get callers of BFToReplaceWith.
std::vector<CallSite> &BFToReplaceWithCallers = Callers[BFToReplaceWith];
// Get MCSymbol for BFToReplaceWith.
MCSymbol *SymbolToReplaceWith =
BC.getOrCreateGlobalSymbol(BFToReplaceWith->getAddress(), "");
// Traverse callers of BFToFold and replace the calls with calls
// to BFToReplaceWith.
for (const CallSite &CS : BFToFoldCallers) {
// Get call instruction.
BinaryFunction *Caller = CS.Caller;
BinaryBasicBlock *CallBB = Caller->getBasicBlockAtIndex(CS.BlockIndex);
MCInst &CallInst = CallBB->getInstructionAtIndex(CS.InstrIndex);
// Replace call target with BFToReplaceWith.
auto Success = BC.MIA->replaceCallTargetOperand(CallInst,
SymbolToReplaceWith,
BC.Ctx.get());
assert(Success && "unexpected call target prevented the replacement");
// Add this call site to the callers of BFToReplaceWith.
BFToReplaceWithCallers.emplace_back(CS);
// Add caller to the set of modified functions.
Modified.insert(Caller);
// Update dynamic calls folded stat.
if (Caller->hasValidProfile() &&
CallBB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
NumDynamicCallsFolded += CallBB->getExecutionCount();
}
// Remove all callers of BFToFold.
BFToFoldCallers.clear();
++NumFunctionsFolded;
// Merge execution counts of BFToFold into those of BFToReplaceWith.
BFToFold->mergeProfileDataInto(*BFToReplaceWith);
}
void IdenticalCodeFolding::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
discoverCallers(BC, BFs);
// This hash table is used to identify identical functions. It maps
// a function to a bucket of functions identical to it.
struct KeyHash {
std::size_t operator()(const BinaryFunction *F) const { return F->hash(); }
};
struct KeyEqual {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
return A->isIdenticalWith(*B);
}
};
std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual> Buckets;
// Set that holds the functions that were modified by the last pass.
std::set<BinaryFunction *> Mod;
// Vector of all the candidate functions to be tested for being identical
// to each other. Initialized with all simple functions.
std::vector<BinaryFunction *> Cands;
for (auto &I : BFs) {
BinaryFunction *BF = &I.second;
if (BF->isSimple())
Cands.emplace_back(BF);
}
// We repeat the icf pass until no new modifications happen.
unsigned Iter = 1;
do {
Buckets.clear();
Mod.clear();
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: icf pass " << Iter << "...\n";
}
uint64_t NumIdenticalFunctions = 0;
// Compare candidate functions using the Buckets hash table. Identical
// functions are effiently discovered and added to the same bucket.
for (BinaryFunction *BF : Cands) {
Buckets[BF].emplace_back(BF);
}
Cands.clear();
// Go through the functions of each bucket and fold any references to them
// with the references to the hottest function among them.
for (auto &I : Buckets) {
std::vector<BinaryFunction *> &IFs = I.second;
std::sort(IFs.begin(), IFs.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (!A->hasValidProfile() && !B->hasValidProfile())
return false;
if (!A->hasValidProfile())
return false;
if (!B->hasValidProfile())
return true;
return B->getExecutionCount() < A->getExecutionCount();
}
);
BinaryFunction *Hottest = IFs[0];
// For the next pass, we consider only one function from each set of
// identical functions.
Cands.emplace_back(Hottest);
if (IFs.size() <= 1)
continue;
NumIdenticalFunctions += IFs.size() - 1;
for (unsigned i = 1; i < IFs.size(); ++i) {
BinaryFunction *BF = IFs[i];
foldFunction(BC, BFs, BF, Hottest, Mod);
}
}
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: found " << NumIdenticalFunctions
<< " identical functions.\n"
<< "BOLT-INFO: modified " << Mod.size() << " functions.\n";
}
NumIdenticalFunctionsFound += NumIdenticalFunctions;
++Iter;
} while (!Mod.empty());
outs() << "BOLT-INFO: ICF pass found " << NumIdenticalFunctionsFound
<< " functions identical to some other function.\n"
<< "BOLT-INFO: ICF pass folded references to " << NumFunctionsFolded
<< " functions.\n"
<< "BOLT-INFO: ICF pass folded " << NumDynamicCallsFolded << " dynamic"
<< " function calls.\n"
<< "BOLT-INFO: Removing all identical functions could save "
<< format("%.2lf", (double) BytesSavedEstimate / 1024)
<< " KB of code space.\n";
if (opts::PrintAll || opts::PrintICF) {
for (auto &I : BFs) {
I.second.print(outs(), "after identical code folding", true);
}
}
}
} // namespace bolt
} // namespace llvm