c27a6a5c63
Summary: I've added a verbosity level to help keep the BOLT spewage to a minimum. The default level is pretty terse now, level 1 is closer to the original, I've saved level 2 for the noisiest of messages. Error messages should never be suppressed by the verbosity level only warnings and info messages. The rational behind stream usage is as follows: outs() for info and debugging controlled by command line flags. errs() for errors and warnings. dbgs() for output within DEBUG(). With the exception of a few of the level 2 messages I don't have any strong feelings about the others. (cherry picked from FBD3814259)
1307 lines
44 KiB
C++
1307 lines
44 KiB
C++
//===--- BinaryPasses.cpp - Binary-level analysis/optimization passes -----===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BinaryPasses.h"
|
|
#include "llvm/Support/Options.h"
|
|
#include <unordered_map>
|
|
|
|
#define DEBUG_TYPE "bolt"
|
|
|
|
namespace opts {
|
|
|
|
extern llvm::cl::opt<unsigned> Verbosity;
|
|
extern llvm::cl::opt<bool> PrintAll;
|
|
extern llvm::cl::opt<bool> DumpDotAll;
|
|
extern llvm::cl::opt<bool> PrintReordered;
|
|
extern llvm::cl::opt<bool> PrintEHRanges;
|
|
extern llvm::cl::opt<bool> PrintUCE;
|
|
extern llvm::cl::opt<bool> PrintPeepholes;
|
|
extern llvm::cl::opt<bool> PrintSimplifyROLoads;
|
|
extern llvm::cl::opt<bool> PrintICF;
|
|
extern llvm::cl::opt<llvm::bolt::BinaryFunction::SplittingType> SplitFunctions;
|
|
extern bool shouldProcess(const llvm::bolt::BinaryFunction &Function);
|
|
|
|
static llvm::cl::opt<llvm::bolt::BinaryFunction::LayoutType>
|
|
ReorderBlocks(
|
|
"reorder-blocks",
|
|
llvm::cl::desc("change layout of basic blocks in a function"),
|
|
llvm::cl::init(llvm::bolt::BinaryFunction::LT_NONE),
|
|
llvm::cl::values(clEnumValN(llvm::bolt::BinaryFunction::LT_NONE,
|
|
"none",
|
|
"do not reorder basic blocks"),
|
|
clEnumValN(llvm::bolt::BinaryFunction::LT_REVERSE,
|
|
"reverse",
|
|
"layout blocks in reverse order"),
|
|
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE,
|
|
"normal",
|
|
"perform optimal layout based on profile"),
|
|
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE_BRANCH,
|
|
"branch-predictor",
|
|
"perform optimal layout prioritizing branch "
|
|
"predictions"),
|
|
clEnumValN(llvm::bolt::BinaryFunction::LT_OPTIMIZE_CACHE,
|
|
"cache",
|
|
"perform optimal layout prioritizing I-cache "
|
|
"behavior"),
|
|
clEnumValEnd));
|
|
|
|
static llvm::cl::opt<bool>
|
|
MinBranchClusters(
|
|
"min-branch-clusters",
|
|
llvm::cl::desc("use a modified clustering algorithm geared towards "
|
|
"minimizing branches"),
|
|
llvm::cl::Hidden);
|
|
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
void OptimizeBodylessFunctions::analyze(
|
|
BinaryFunction &BF,
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs) {
|
|
if (BF.size() != 1 || (*BF.begin()).size() == 0)
|
|
return;
|
|
|
|
auto &BB = *BF.begin();
|
|
const auto &FirstInst = *BB.begin();
|
|
if (!BC.MIA->isTailCall(FirstInst))
|
|
return;
|
|
auto &Op1 = FirstInst.getOperand(0);
|
|
if (!Op1.isExpr())
|
|
return;
|
|
auto Expr = dyn_cast<MCSymbolRefExpr>(Op1.getExpr());
|
|
if (!Expr)
|
|
return;
|
|
const auto *Function = BC.getFunctionForSymbol(&Expr->getSymbol());
|
|
if (!Function)
|
|
return;
|
|
|
|
EquivalentCallTarget[BF.getSymbol()] = Function;
|
|
}
|
|
|
|
void OptimizeBodylessFunctions::optimizeCalls(BinaryFunction &BF,
|
|
BinaryContext &BC) {
|
|
for (auto BBIt = BF.begin(), BBEnd = BF.end(); BBIt != BBEnd; ++BBIt) {
|
|
for (auto InstIt = (*BBIt).begin(), InstEnd = (*BBIt).end();
|
|
InstIt != InstEnd; ++InstIt) {
|
|
auto &Inst = *InstIt;
|
|
if (!BC.MIA->isCall(Inst))
|
|
continue;
|
|
auto &Op1 = Inst.getOperand(0);
|
|
if (!Op1.isExpr())
|
|
continue;
|
|
auto Expr = dyn_cast<MCSymbolRefExpr>(Op1.getExpr());
|
|
if (!Expr)
|
|
continue;
|
|
auto *OriginalTarget = &Expr->getSymbol();
|
|
auto *Target = OriginalTarget;
|
|
// Iteratively update target since we could have f1() calling f2()
|
|
// calling f3() calling f4() and we want to output f1() directly
|
|
// calling f4().
|
|
while (EquivalentCallTarget.count(Target)) {
|
|
Target = EquivalentCallTarget.find(Target)->second->getSymbol();
|
|
}
|
|
if (Target == OriginalTarget)
|
|
continue;
|
|
DEBUG(dbgs() << "BOLT-DEBUG: Optimizing " << (*BBIt).getName()
|
|
<< " in " << BF
|
|
<< ": replacing call to " << OriginalTarget->getName()
|
|
<< " by call to " << Target->getName() << "\n");
|
|
BC.MIA->replaceCallTargetOperand(Inst, Target, BC.Ctx.get());
|
|
}
|
|
}
|
|
}
|
|
|
|
void OptimizeBodylessFunctions::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &) {
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
if (Function.isSimple() && opts::shouldProcess(Function)) {
|
|
analyze(Function, BC, BFs);
|
|
}
|
|
}
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
if (Function.isSimple() && opts::shouldProcess(Function)) {
|
|
optimizeCalls(Function, BC);
|
|
}
|
|
}
|
|
}
|
|
|
|
void InlineSmallFunctions::findInliningCandidates(
|
|
BinaryContext &BC,
|
|
const std::map<uint64_t, BinaryFunction> &BFs) {
|
|
for (const auto &BFIt : BFs) {
|
|
const auto &Function = BFIt.second;
|
|
if (!Function.isSimple() ||
|
|
!opts::shouldProcess(Function) ||
|
|
Function.size() != 1)
|
|
continue;
|
|
auto &BB = *Function.begin();
|
|
const auto &LastInstruction = *BB.rbegin();
|
|
// Check if the function is small enough and doesn't do a tail call.
|
|
// The size we use includes pseudo-instructions but here they shouldn't
|
|
// matter. So some opportunities may be missed because of this.
|
|
if (BB.size() > 0 &&
|
|
BB.size() <= kMaxInstructions &&
|
|
BC.MIA->isReturn(LastInstruction) &&
|
|
!BC.MIA->isTailCall(LastInstruction)) {
|
|
InliningCandidates.insert(&Function);
|
|
}
|
|
}
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: " << InliningCandidates.size()
|
|
<< " inlineable functions.\n");
|
|
}
|
|
|
|
void InlineSmallFunctions::findInliningCandidatesAggressive(
|
|
BinaryContext &BC,
|
|
const std::map<uint64_t, BinaryFunction> &BFs) {
|
|
std::set<std::string> OverwrittenFunctions = {
|
|
"_ZN4HPHP13hash_string_iEPKcj",
|
|
"_ZN4HPHP21hash_string_cs_unsafeEPKcj",
|
|
"_ZN4HPHP14hash_string_csEPKcj",
|
|
"_ZN4HPHP20hash_string_i_unsafeEPKcj",
|
|
"_ZNK4HPHP10StringData10hashHelperEv"
|
|
};
|
|
for (const auto &BFIt : BFs) {
|
|
const auto &Function = BFIt.second;
|
|
if (!Function.isSimple() ||
|
|
!opts::shouldProcess(Function) ||
|
|
OverwrittenFunctions.count(Function.getSymbol()->getName()) ||
|
|
Function.hasEHRanges())
|
|
continue;
|
|
uint64_t FunctionSize = 0;
|
|
for (const auto *BB : Function.layout()) {
|
|
FunctionSize += BC.computeCodeSize(BB->begin(), BB->end());
|
|
}
|
|
assert(FunctionSize > 0 && "found empty function");
|
|
if (FunctionSize > kMaxSize)
|
|
continue;
|
|
bool FoundCFI = false;
|
|
for (const auto BB : Function.layout()) {
|
|
for (const auto &Inst : *BB) {
|
|
if (BC.MIA->isEHLabel(Inst) || BC.MIA->isCFI(Inst)) {
|
|
FoundCFI = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!FoundCFI)
|
|
InliningCandidates.insert(&Function);
|
|
}
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: " << InliningCandidates.size()
|
|
<< " inlineable functions.\n");
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Returns whether a function creates a stack frame for itself or not.
|
|
/// If so, we need to manipulate the stack pointer when calling this function.
|
|
/// Since we're only inlining very small functions, we return false for now, but
|
|
/// we could for instance check if the function starts with 'push ebp'.
|
|
/// TODO generalize this.
|
|
bool createsStackFrame(const BinaryBasicBlock &) {
|
|
return false;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void InlineSmallFunctions::inlineCall(
|
|
BinaryContext &BC,
|
|
BinaryBasicBlock &BB,
|
|
MCInst *CallInst,
|
|
const BinaryBasicBlock &InlinedFunctionBB) {
|
|
assert(BC.MIA->isCall(*CallInst) && "Can only inline a call.");
|
|
assert(BC.MIA->isReturn(*InlinedFunctionBB.rbegin()) &&
|
|
"Inlined function should end with a return.");
|
|
|
|
std::vector<MCInst> InlinedInstance;
|
|
|
|
bool ShouldAdjustStack = createsStackFrame(InlinedFunctionBB);
|
|
|
|
// Move stack like 'call' would if needed.
|
|
if (ShouldAdjustStack) {
|
|
MCInst StackInc;
|
|
BC.MIA->createStackPointerIncrement(StackInc);
|
|
InlinedInstance.push_back(StackInc);
|
|
}
|
|
|
|
for (auto Instruction : InlinedFunctionBB) {
|
|
if (BC.MIA->isReturn(Instruction)) {
|
|
break;
|
|
}
|
|
if (!BC.MIA->isEHLabel(Instruction) &&
|
|
!BC.MIA->isCFI(Instruction)) {
|
|
InlinedInstance.push_back(Instruction);
|
|
}
|
|
}
|
|
|
|
// Move stack pointer like 'ret' would.
|
|
if (ShouldAdjustStack) {
|
|
MCInst StackDec;
|
|
BC.MIA->createStackPointerDecrement(StackDec);
|
|
InlinedInstance.push_back(StackDec);
|
|
}
|
|
|
|
BB.replaceInstruction(CallInst, InlinedInstance);
|
|
}
|
|
|
|
std::pair<BinaryBasicBlock *, unsigned>
|
|
InlineSmallFunctions::inlineCall(
|
|
BinaryContext &BC,
|
|
BinaryFunction &CallerFunction,
|
|
BinaryBasicBlock *CallerBB,
|
|
const unsigned CallInstIndex,
|
|
const BinaryFunction &InlinedFunction) {
|
|
// Get the instruction to be replaced with inlined code.
|
|
MCInst &CallInst = CallerBB->getInstructionAtIndex(CallInstIndex);
|
|
assert(BC.MIA->isCall(CallInst) && "Can only inline a call.");
|
|
|
|
// Point in the function after the inlined code.
|
|
BinaryBasicBlock *AfterInlinedBB = nullptr;
|
|
unsigned AfterInlinedIstrIndex = 0;
|
|
|
|
// In case of a tail call we should not remove any ret instructions from the
|
|
// inlined instance.
|
|
bool IsTailCall = BC.MIA->isTailCall(CallInst);
|
|
|
|
// The first block of the function to be inlined can be merged with the caller
|
|
// basic block. This cannot happen if there are jumps to the first block.
|
|
bool CanMergeFirstInlinedBlock = (*InlinedFunction.begin()).pred_size() == 0;
|
|
|
|
// If the call to be inlined is not at the end of its basic block and we have
|
|
// to inline more than one basic blocks (or even just one basic block that
|
|
// cannot be merged into the caller block), then the caller's basic block
|
|
// should be split.
|
|
bool ShouldSplitCallerBB =
|
|
CallInstIndex < CallerBB->size() - 1 &&
|
|
(InlinedFunction.size() > 1 || !CanMergeFirstInlinedBlock);
|
|
|
|
// Copy inlined function's basic blocks into a vector of basic blocks that
|
|
// will be inserted in the caller function (the inlined instance). Also, we
|
|
// keep a mapping from basic block index to the corresponding block in the
|
|
// inlined instance.
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> InlinedInstance;
|
|
std::vector<BinaryBasicBlock *>
|
|
BBIndexToInlinedInstanceBB(InlinedFunction.size(), nullptr);
|
|
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
|
|
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
|
|
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)] =
|
|
InlinedInstance.back().get();
|
|
if (InlinedFunction.hasValidProfile())
|
|
InlinedInstance.back()->setExecutionCount(
|
|
InlinedFunctionBB->getExecutionCount());
|
|
}
|
|
if (ShouldSplitCallerBB) {
|
|
// Add one extra block at the inlined instance for the removed part of the
|
|
// caller block.
|
|
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
|
|
BBIndexToInlinedInstanceBB.push_back(InlinedInstance.back().get());
|
|
if (CallerFunction.hasValidProfile())
|
|
InlinedInstance.back()->setExecutionCount(CallerBB->getExecutionCount());
|
|
}
|
|
|
|
// Copy instructions to the basic blocks of the inlined instance.
|
|
unsigned InlinedInstanceBBIndex = 0;
|
|
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
|
|
// Get the corresponding block of the inlined instance.
|
|
auto *InlinedInstanceBB = InlinedInstance[InlinedInstanceBBIndex].get();
|
|
assert(InlinedInstanceBB ==
|
|
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)]);
|
|
|
|
bool IsExitingBlock = false;
|
|
|
|
// Copy instructions into the inlined instance.
|
|
for (auto Instruction : *InlinedFunctionBB) {
|
|
if (!IsTailCall &&
|
|
BC.MIA->isReturn(Instruction) &&
|
|
!BC.MIA->isTailCall(Instruction)) {
|
|
// Skip returns when the caller does a normal call as opposed to a tail
|
|
// call.
|
|
IsExitingBlock = true;
|
|
continue;
|
|
}
|
|
if (!IsTailCall &&
|
|
BC.MIA->isTailCall(Instruction)) {
|
|
// Convert tail calls to normal calls when the caller does a normal
|
|
// call.
|
|
if (!BC.MIA->convertTailCallToCall(Instruction))
|
|
assert(false && "unexpected tail call opcode found");
|
|
IsExitingBlock = true;
|
|
}
|
|
if (BC.MIA->isBranch(Instruction) &&
|
|
!BC.MIA->isIndirectBranch(Instruction)) {
|
|
// Convert the branch targets in the branch instructions that will be
|
|
// added to the inlined instance.
|
|
const MCSymbol *OldTargetLabel = nullptr;
|
|
const MCSymbol *OldFTLabel = nullptr;
|
|
MCInst *CondBranch = nullptr;
|
|
MCInst *UncondBranch = nullptr;
|
|
assert(BC.MIA->analyzeBranch(Instruction, OldTargetLabel, OldFTLabel,
|
|
CondBranch, UncondBranch));
|
|
assert(OldTargetLabel);
|
|
const MCSymbol *NewTargetLabel = nullptr;
|
|
for (const auto SuccBB : InlinedFunctionBB->successors()) {
|
|
if (SuccBB->getLabel() == OldTargetLabel) {
|
|
const auto InlinedInstanceSuccBB =
|
|
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(SuccBB)];
|
|
NewTargetLabel = InlinedInstanceSuccBB->getLabel();
|
|
break;
|
|
}
|
|
}
|
|
assert(NewTargetLabel);
|
|
BC.MIA->replaceBranchTarget(Instruction, NewTargetLabel, BC.Ctx.get());
|
|
}
|
|
// TODO; Currently we simply ignore CFI instructions but we need to
|
|
// address them for correctness.
|
|
if (!BC.MIA->isEHLabel(Instruction) &&
|
|
!BC.MIA->isCFI(Instruction)) {
|
|
InlinedInstanceBB->addInstruction(std::move(Instruction));
|
|
}
|
|
}
|
|
|
|
// Add CFG edges to the basic blocks of the inlined instance.
|
|
std::vector<BinaryBasicBlock *>
|
|
Successors(InlinedFunctionBB->succ_size(), nullptr);
|
|
std::transform(
|
|
InlinedFunctionBB->succ_begin(),
|
|
InlinedFunctionBB->succ_end(),
|
|
Successors.begin(),
|
|
[&InlinedFunction, &BBIndexToInlinedInstanceBB]
|
|
(const BinaryBasicBlock *BB) {
|
|
return BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(BB)];
|
|
});
|
|
if (InlinedFunction.hasValidProfile()) {
|
|
InlinedInstanceBB->addSuccessors(
|
|
Successors.begin(),
|
|
Successors.end(),
|
|
InlinedFunctionBB->branch_info_begin(),
|
|
InlinedFunctionBB->branch_info_end());
|
|
} else {
|
|
InlinedInstanceBB->addSuccessors(
|
|
Successors.begin(),
|
|
Successors.end());
|
|
}
|
|
|
|
if (IsExitingBlock) {
|
|
assert(Successors.size() == 0);
|
|
if (ShouldSplitCallerBB) {
|
|
if (InlinedFunction.hasValidProfile()) {
|
|
InlinedInstanceBB->addSuccessor(
|
|
InlinedInstance.back().get(),
|
|
InlinedInstanceBB->getExecutionCount());
|
|
} else {
|
|
InlinedInstanceBB->addSuccessor(InlinedInstance.back().get());
|
|
}
|
|
MCInst ExitBranchInst;
|
|
const MCSymbol *ExitLabel = InlinedInstance.back().get()->getLabel();
|
|
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
|
|
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
|
|
} else if (InlinedInstanceBBIndex > 0 || !CanMergeFirstInlinedBlock) {
|
|
assert(CallInstIndex == CallerBB->size() - 1);
|
|
assert(CallerBB->succ_size() <= 1);
|
|
if (CallerBB->succ_size() == 1) {
|
|
if (InlinedFunction.hasValidProfile()) {
|
|
InlinedInstanceBB->addSuccessor(
|
|
*CallerBB->succ_begin(),
|
|
InlinedInstanceBB->getExecutionCount());
|
|
} else {
|
|
InlinedInstanceBB->addSuccessor(*CallerBB->succ_begin());
|
|
}
|
|
MCInst ExitBranchInst;
|
|
const MCSymbol *ExitLabel = (*CallerBB->succ_begin())->getLabel();
|
|
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
|
|
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
|
|
}
|
|
}
|
|
}
|
|
|
|
++InlinedInstanceBBIndex;
|
|
}
|
|
|
|
if (ShouldSplitCallerBB) {
|
|
// Split the basic block that contains the call and add the removed
|
|
// instructions in the last block of the inlined instance.
|
|
// (Is it OK to have a basic block with just CFI instructions?)
|
|
std::vector<MCInst> TrailInstructions =
|
|
std::move(CallerBB->splitInstructions(&CallInst));
|
|
assert(TrailInstructions.size() > 0);
|
|
InlinedInstance.back()->addInstructions(
|
|
TrailInstructions.begin(),
|
|
TrailInstructions.end());
|
|
// Add CFG edges for the block with the removed instructions.
|
|
if (CallerFunction.hasValidProfile()) {
|
|
InlinedInstance.back()->addSuccessors(
|
|
CallerBB->succ_begin(),
|
|
CallerBB->succ_end(),
|
|
CallerBB->branch_info_begin(),
|
|
CallerBB->branch_info_end());
|
|
} else {
|
|
InlinedInstance.back()->addSuccessors(
|
|
CallerBB->succ_begin(),
|
|
CallerBB->succ_end());
|
|
}
|
|
// Update the after-inlined point.
|
|
AfterInlinedBB = InlinedInstance.back().get();
|
|
AfterInlinedIstrIndex = 0;
|
|
}
|
|
|
|
assert(InlinedInstance.size() > 0 && "found function with no basic blocks");
|
|
assert(InlinedInstance.front()->size() > 0 &&
|
|
"found function with empty basic block");
|
|
|
|
// If the inlining cannot happen as a simple instruction insertion into
|
|
// CallerBB, we remove the outgoing CFG edges of the caller block.
|
|
if (InlinedInstance.size() > 1 || !CanMergeFirstInlinedBlock) {
|
|
CallerBB->removeSuccessors(CallerBB->succ_begin(), CallerBB->succ_end());
|
|
if (!ShouldSplitCallerBB) {
|
|
// Update the after-inlined point.
|
|
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
|
|
AfterInlinedIstrIndex = 0;
|
|
}
|
|
} else {
|
|
assert(!ShouldSplitCallerBB);
|
|
// Update the after-inlined point.
|
|
if (CallInstIndex < CallerBB->size() - 1) {
|
|
AfterInlinedBB = CallerBB;
|
|
AfterInlinedIstrIndex =
|
|
CallInstIndex + InlinedInstance.front()->size();
|
|
} else {
|
|
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
|
|
AfterInlinedIstrIndex = 0;
|
|
}
|
|
}
|
|
|
|
// Do the inlining by merging the first block of the inlined instance into
|
|
// the caller basic block if possible and adding the rest of the inlined
|
|
// instance basic blocks in the caller function.
|
|
if (CanMergeFirstInlinedBlock) {
|
|
CallerBB->replaceInstruction(
|
|
&CallInst,
|
|
InlinedInstance.front()->begin(),
|
|
InlinedInstance.front()->end());
|
|
if (InlinedInstance.size() > 1) {
|
|
auto FirstBB = InlinedInstance.begin()->get();
|
|
if (InlinedFunction.hasValidProfile()) {
|
|
CallerBB->addSuccessors(
|
|
FirstBB->succ_begin(),
|
|
FirstBB->succ_end(),
|
|
FirstBB->branch_info_begin(),
|
|
FirstBB->branch_info_end());
|
|
} else {
|
|
CallerBB->addSuccessors(
|
|
FirstBB->succ_begin(),
|
|
FirstBB->succ_end());
|
|
}
|
|
FirstBB->removeSuccessors(FirstBB->succ_begin(), FirstBB->succ_end());
|
|
}
|
|
InlinedInstance.erase(InlinedInstance.begin());
|
|
} else {
|
|
CallerBB->eraseInstruction(&CallInst);
|
|
if (CallerFunction.hasValidProfile()) {
|
|
CallerBB->addSuccessor(InlinedInstance.front().get(),
|
|
CallerBB->getExecutionCount());
|
|
} else {
|
|
CallerBB->addSuccessor(InlinedInstance.front().get(),
|
|
CallerBB->getExecutionCount());
|
|
}
|
|
}
|
|
unsigned NumBlocksToAdd = InlinedInstance.size();
|
|
CallerFunction.insertBasicBlocks(CallerBB, std::move(InlinedInstance));
|
|
CallerFunction.updateLayout(CallerBB, NumBlocksToAdd);
|
|
CallerFunction.fixBranches();
|
|
|
|
return std::make_pair(AfterInlinedBB, AfterInlinedIstrIndex);
|
|
}
|
|
|
|
bool InlineSmallFunctions::inlineCallsInFunction(
|
|
BinaryContext &BC,
|
|
BinaryFunction &Function) {
|
|
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
|
|
Function.layout().end());
|
|
std::sort(Blocks.begin(), Blocks.end(),
|
|
[](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
|
|
return BB1->getExecutionCount() > BB2->getExecutionCount();
|
|
});
|
|
uint32_t ExtraSize = 0;
|
|
|
|
for (auto BB : Blocks) {
|
|
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ++InstIt) {
|
|
auto &Inst = *InstIt;
|
|
if (BC.MIA->isCall(Inst)) {
|
|
totalDynamicCalls += BB->getExecutionCount();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool DidInlining = false;
|
|
|
|
for (auto BB : Blocks) {
|
|
if (BB->isCold())
|
|
continue;
|
|
|
|
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ) {
|
|
auto &Inst = *InstIt;
|
|
if (BC.MIA->isCall(Inst) &&
|
|
!BC.MIA->isTailCall(Inst) &&
|
|
Inst.size() == 1 &&
|
|
Inst.getOperand(0).isExpr()) {
|
|
auto Target = dyn_cast<MCSymbolRefExpr>(
|
|
Inst.getOperand(0).getExpr());
|
|
assert(Target && "Not MCSymbolRefExpr");
|
|
const auto *TargetFunction =
|
|
BC.getFunctionForSymbol(&Target->getSymbol());
|
|
if (TargetFunction) {
|
|
bool CallToInlineableFunction =
|
|
InliningCandidates.count(TargetFunction);
|
|
|
|
totalInlineableCalls +=
|
|
CallToInlineableFunction * BB->getExecutionCount();
|
|
|
|
if (CallToInlineableFunction &&
|
|
TargetFunction->getSize() + ExtraSize
|
|
+ Function.estimateHotSize() < Function.getMaxSize()) {
|
|
auto NextInstIt = std::next(InstIt);
|
|
inlineCall(BC, *BB, &Inst, *TargetFunction->begin());
|
|
DidInlining = true;
|
|
DEBUG(dbgs() << "BOLT-DEBUG: Inlining call to "
|
|
<< *TargetFunction << " in "
|
|
<< Function << "\n");
|
|
InstIt = NextInstIt;
|
|
ExtraSize += TargetFunction->getSize();
|
|
inlinedDynamicCalls += BB->getExecutionCount();
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
++InstIt;
|
|
}
|
|
}
|
|
|
|
return DidInlining;
|
|
}
|
|
|
|
bool InlineSmallFunctions::inlineCallsInFunctionAggressive(
|
|
BinaryContext &BC,
|
|
BinaryFunction &Function) {
|
|
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
|
|
Function.layout().end());
|
|
std::sort(Blocks.begin(), Blocks.end(),
|
|
[](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
|
|
return BB1->getExecutionCount() > BB2->getExecutionCount();
|
|
});
|
|
uint32_t ExtraSize = 0;
|
|
|
|
for (auto BB : Blocks) {
|
|
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ++InstIt) {
|
|
auto &Inst = *InstIt;
|
|
if (BC.MIA->isCall(Inst)) {
|
|
totalDynamicCalls += BB->getExecutionCount();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool DidInlining = false;
|
|
|
|
for (auto BB : Blocks) {
|
|
if (BB->isCold())
|
|
continue;
|
|
|
|
unsigned InstIndex = 0;
|
|
for (auto InstIt = BB->begin(); InstIt != BB->end(); ) {
|
|
auto &Inst = *InstIt;
|
|
if (BC.MIA->isCall(Inst) &&
|
|
Inst.size() == 1 &&
|
|
Inst.getOperand(0).isExpr()) {
|
|
assert(!BC.MIA->isInvoke(Inst));
|
|
auto Target = dyn_cast<MCSymbolRefExpr>(
|
|
Inst.getOperand(0).getExpr());
|
|
assert(Target && "Not MCSymbolRefExpr");
|
|
const auto *TargetFunction =
|
|
BC.getFunctionForSymbol(&Target->getSymbol());
|
|
if (TargetFunction) {
|
|
bool CallToInlineableFunction =
|
|
InliningCandidates.count(TargetFunction);
|
|
|
|
totalInlineableCalls +=
|
|
CallToInlineableFunction * BB->getExecutionCount();
|
|
|
|
if (CallToInlineableFunction &&
|
|
TargetFunction->getSize() + ExtraSize
|
|
+ Function.estimateHotSize() < Function.getMaxSize()) {
|
|
unsigned NextInstIndex = 0;
|
|
BinaryBasicBlock *NextBB = nullptr;
|
|
std::tie(NextBB, NextInstIndex) =
|
|
inlineCall(BC, Function, BB, InstIndex, *TargetFunction);
|
|
DidInlining = true;
|
|
DEBUG(dbgs() << "BOLT-DEBUG: Inlining call to "
|
|
<< *TargetFunction << " in "
|
|
<< Function << "\n");
|
|
InstIndex = NextBB == BB ? NextInstIndex : BB->size();
|
|
InstIt = NextBB == BB ? BB->begin() + NextInstIndex : BB->end();
|
|
ExtraSize += TargetFunction->getSize();
|
|
inlinedDynamicCalls += BB->getExecutionCount();
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
++InstIndex;
|
|
++InstIt;
|
|
}
|
|
}
|
|
|
|
return DidInlining;
|
|
}
|
|
|
|
void InlineSmallFunctions::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &) {
|
|
findInliningCandidates(BC, BFs);
|
|
|
|
std::vector<BinaryFunction *> ConsideredFunctions;
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
if (!Function.isSimple() || !opts::shouldProcess(Function))
|
|
continue;
|
|
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
|
|
continue;
|
|
ConsideredFunctions.push_back(&Function);
|
|
}
|
|
std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(),
|
|
[](BinaryFunction *A, BinaryFunction *B) {
|
|
return B->getExecutionCount() < A->getExecutionCount();
|
|
});
|
|
unsigned ModifiedFunctions = 0;
|
|
for (unsigned i = 0; i < ConsideredFunctions.size() &&
|
|
ModifiedFunctions <= kMaxFunctions; ++i) {
|
|
auto &Function = *ConsideredFunctions[i];
|
|
if (inlineCallsInFunction(BC, Function))
|
|
++ModifiedFunctions;
|
|
}
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: Inlined " << inlinedDynamicCalls << " of "
|
|
<< totalDynamicCalls << " function calls in the profile.\n");
|
|
DEBUG(dbgs() << "BOLT-DEBUG: Inlined calls represent "
|
|
<< (100.0 * inlinedDynamicCalls / totalInlineableCalls)
|
|
<< "% of all inlineable calls in the profile.\n");
|
|
}
|
|
|
|
void EliminateUnreachableBlocks::runOnFunction(BinaryFunction& Function) {
|
|
if (!Function.isSimple() || !opts::shouldProcess(Function)) return;
|
|
|
|
// FIXME: this wouldn't work with C++ exceptions until we implement
|
|
// support for those as there will be "invisible" edges
|
|
// in the graph.
|
|
if (Function.layout_size() > 0) {
|
|
if (NagUser) {
|
|
if (opts::Verbosity >= 1) {
|
|
errs()
|
|
<< "BOLT-WARNING: Using -eliminate-unreachable is experimental and "
|
|
"unsafe for exceptions\n";
|
|
}
|
|
NagUser = false;
|
|
}
|
|
|
|
if (Function.hasEHRanges()) return;
|
|
|
|
std::stack<BinaryBasicBlock*> Stack;
|
|
std::map<BinaryBasicBlock *, bool> Reachable;
|
|
BinaryBasicBlock *Entry = *Function.layout_begin();
|
|
Stack.push(Entry);
|
|
Reachable[Entry] = true;
|
|
// Determine reachable BBs from the entry point
|
|
while (!Stack.empty()) {
|
|
auto BB = Stack.top();
|
|
Stack.pop();
|
|
for (auto Succ : BB->successors()) {
|
|
if (Reachable[Succ])
|
|
continue;
|
|
Reachable[Succ] = true;
|
|
Stack.push(Succ);
|
|
}
|
|
}
|
|
|
|
auto Count = Function.eraseDeadBBs(Reachable);
|
|
if (Count) {
|
|
DEBUG(dbgs() << "BOLT: Removed " << Count
|
|
<< " dead basic block(s) in function " << Function << '\n');
|
|
}
|
|
|
|
if (opts::PrintAll || opts::PrintUCE)
|
|
Function.print(outs(), "after unreachable code elimination", true);
|
|
|
|
if (opts::DumpDotAll)
|
|
Function.dumpGraphForPass("unreachable-code");
|
|
}
|
|
}
|
|
|
|
void EliminateUnreachableBlocks::runOnFunctions(
|
|
BinaryContext&,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &
|
|
) {
|
|
for (auto &It : BFs) {
|
|
runOnFunction(It.second);
|
|
}
|
|
}
|
|
|
|
void ReorderBasicBlocks::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &LargeFunctions) {
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
|
|
if (!Function.isSimple())
|
|
continue;
|
|
|
|
if (!opts::shouldProcess(Function))
|
|
continue;
|
|
|
|
if (opts::ReorderBlocks != BinaryFunction::LT_NONE) {
|
|
bool ShouldSplit =
|
|
(opts::SplitFunctions == BinaryFunction::ST_ALL) ||
|
|
(opts::SplitFunctions == BinaryFunction::ST_EH &&
|
|
Function.hasEHRanges()) ||
|
|
(LargeFunctions.find(It.first) != LargeFunctions.end());
|
|
Function.modifyLayout(opts::ReorderBlocks, opts::MinBranchClusters,
|
|
ShouldSplit);
|
|
if (opts::PrintAll || opts::PrintReordered)
|
|
Function.print(outs(), "after reordering blocks", true);
|
|
if (opts::DumpDotAll)
|
|
Function.dumpGraphForPass("reordering");
|
|
}
|
|
}
|
|
}
|
|
|
|
void FixupFunctions::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &
|
|
) {
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
|
|
if (!Function.isSimple())
|
|
continue;
|
|
|
|
if (!opts::shouldProcess(Function))
|
|
continue;
|
|
|
|
// Fix the CFI state.
|
|
if (!Function.fixCFIState()) {
|
|
if (opts::Verbosity >= 1) {
|
|
errs() << "BOLT-WARNING: unable to fix CFI state for function "
|
|
<< Function << ". Skipping.\n";
|
|
}
|
|
Function.setSimple(false);
|
|
continue;
|
|
}
|
|
|
|
// Update exception handling information.
|
|
Function.updateEHRanges();
|
|
if (opts::PrintAll || opts::PrintEHRanges)
|
|
Function.print(outs(), "after updating EH ranges", true);
|
|
if (opts::DumpDotAll)
|
|
Function.dumpGraphForPass("update-EH-ranges");
|
|
}
|
|
}
|
|
|
|
bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|
BinaryFunction &BF) {
|
|
if (BF.layout_size() == 0)
|
|
return false;
|
|
|
|
auto &MIA = BC.MIA;
|
|
uint64_t NumLocalTailCalls = 0;
|
|
uint64_t NumLocalPatchedTailCalls = 0;
|
|
|
|
for (auto* BB : BF.layout()) {
|
|
const MCSymbol *TBB = nullptr;
|
|
const MCSymbol *FBB = nullptr;
|
|
MCInst *CondBranch = nullptr;
|
|
MCInst *UncondBranch = nullptr;
|
|
|
|
// Determine the control flow at the end of each basic block
|
|
if (!BB->analyzeBranch(*MIA, TBB, FBB, CondBranch, UncondBranch)) {
|
|
continue;
|
|
}
|
|
|
|
// TODO: do we need to test for other branch patterns?
|
|
|
|
// For this particular case, the first basic block ends with
|
|
// a conditional branch and has two successors, one fall-through
|
|
// and one for when the condition is true.
|
|
// The target of the conditional is a basic block with a single
|
|
// unconditional branch (i.e. tail call) to another function.
|
|
// We don't care about the contents of the fall-through block.
|
|
// Note: this code makes the assumption that the fall-through
|
|
// block is the last successor.
|
|
if (CondBranch && !UncondBranch && BB->succ_size() == 2) {
|
|
// Find conditional branch target assuming the fall-through is
|
|
// always the last successor.
|
|
auto *CondTargetBB = *BB->succ_begin();
|
|
|
|
// Does the BB contain a single instruction?
|
|
if (CondTargetBB->size() - CondTargetBB->getNumPseudos() == 1) {
|
|
// Check to see if the sole instruction is a tail call.
|
|
auto const &Instr = *CondTargetBB->begin();
|
|
|
|
if (MIA->isTailCall(Instr)) {
|
|
++NumTailCallCandidates;
|
|
++NumLocalTailCalls;
|
|
|
|
auto const &TailTargetSymExpr =
|
|
cast<MCSymbolRefExpr>(Instr.getOperand(0).getExpr());
|
|
auto const &TailTarget = TailTargetSymExpr->getSymbol();
|
|
|
|
// Lookup the address for the tail call target.
|
|
auto const TailAddress = BC.GlobalSymbols.find(TailTarget.getName());
|
|
if (TailAddress == BC.GlobalSymbols.end())
|
|
continue;
|
|
|
|
// Check to make sure we would be doing a forward jump.
|
|
// This assumes the address range of the current BB and the
|
|
// tail call target address don't overlap.
|
|
if (BF.getAddress() < TailAddress->second) {
|
|
++NumTailCallsPatched;
|
|
++NumLocalPatchedTailCalls;
|
|
|
|
// Is the original jump forward or backward?
|
|
const bool isForward =
|
|
TailAddress->second > BF.getAddress() + BB->getOffset();
|
|
|
|
if (isForward) ++NumOrigForwardBranches;
|
|
|
|
// Patch the new target address into the conditional branch.
|
|
CondBranch->getOperand(0).setExpr(TailTargetSymExpr);
|
|
// Remove the unused successor which may be eliminated later
|
|
// if there are no other users.
|
|
BB->removeSuccessor(CondTargetBB);
|
|
DEBUG(dbgs() << "patched " << (isForward ? "(fwd)" : "(back)")
|
|
<< " tail call in " << BF << ".\n";);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
DEBUG(dbgs() << "BOLT: patched " << NumLocalPatchedTailCalls
|
|
<< " tail calls (" << NumOrigForwardBranches << " forward)"
|
|
<< " from a total of " << NumLocalTailCalls
|
|
<< " in function " << BF << "\n";);
|
|
|
|
return NumLocalPatchedTailCalls > 0;
|
|
}
|
|
|
|
void SimplifyConditionalTailCalls::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &
|
|
) {
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
|
|
if (!Function.isSimple())
|
|
continue;
|
|
|
|
// Fix tail calls to reduce branch mispredictions.
|
|
if (fixTailCalls(BC, Function)) {
|
|
if (opts::PrintAll || opts::PrintReordered) {
|
|
Function.print(outs(), "after tail call patching", true);
|
|
}
|
|
if (opts::DumpDotAll) {
|
|
Function.dumpGraphForPass("tail-call-patching");
|
|
}
|
|
}
|
|
}
|
|
|
|
outs() << "BOLT-INFO: patched " << NumTailCallsPatched
|
|
<< " tail calls (" << NumOrigForwardBranches << " forward)"
|
|
<< " from a total of " << NumTailCallCandidates << "\n";
|
|
}
|
|
|
|
void Peepholes::shortenInstructions(BinaryContext &BC,
|
|
BinaryFunction &Function) {
|
|
for (auto &BB : Function) {
|
|
for (auto &Inst : BB) {
|
|
BC.MIA->shortenInstruction(Inst);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Peepholes::runOnFunctions(BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &LargeFunctions) {
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
if (Function.isSimple() && opts::shouldProcess(Function)) {
|
|
shortenInstructions(BC, Function);
|
|
|
|
if (opts::PrintAll || opts::PrintPeepholes) {
|
|
Function.print(outs(), "after peepholes", true);
|
|
}
|
|
|
|
if (opts::DumpDotAll) {
|
|
Function.dumpGraphForPass("peepholes");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool SimplifyRODataLoads::simplifyRODataLoads(
|
|
BinaryContext &BC, BinaryFunction &BF) {
|
|
auto &MIA = BC.MIA;
|
|
|
|
uint64_t NumLocalLoadsSimplified = 0;
|
|
uint64_t NumDynamicLocalLoadsSimplified = 0;
|
|
uint64_t NumLocalLoadsFound = 0;
|
|
uint64_t NumDynamicLocalLoadsFound = 0;
|
|
|
|
for (auto *BB : BF.layout()) {
|
|
for (auto &Inst : *BB) {
|
|
unsigned Opcode = Inst.getOpcode();
|
|
const MCInstrDesc &Desc = BC.MII->get(Opcode);
|
|
|
|
// Skip instructions that do not load from memory.
|
|
if (!Desc.mayLoad())
|
|
continue;
|
|
|
|
// Try to statically evaluate the target memory address;
|
|
uint64_t TargetAddress;
|
|
|
|
if (MIA->hasRIPOperand(Inst)) {
|
|
// Try to find the symbol that corresponds to the rip-relative operand.
|
|
MCOperand DisplOp;
|
|
if (!MIA->getRIPOperandDisp(Inst, DisplOp))
|
|
continue;
|
|
|
|
assert(DisplOp.isExpr() &&
|
|
"found rip-relative with non-symbolic displacement");
|
|
|
|
// Get displacement symbol.
|
|
const MCSymbolRefExpr *DisplExpr;
|
|
if (!(DisplExpr = dyn_cast<MCSymbolRefExpr>(DisplOp.getExpr())))
|
|
continue;
|
|
const MCSymbol &DisplSymbol = DisplExpr->getSymbol();
|
|
|
|
// Look up the symbol address in the global symbols map of the binary
|
|
// context object.
|
|
auto GI = BC.GlobalSymbols.find(DisplSymbol.getName());
|
|
if (GI == BC.GlobalSymbols.end())
|
|
continue;
|
|
TargetAddress = GI->second;
|
|
} else if (!MIA->evaluateMemOperandTarget(Inst, TargetAddress)) {
|
|
continue;
|
|
}
|
|
|
|
// Get the contents of the section containing the target addresss of the
|
|
// memory operand. We are only interested in read-only sections.
|
|
ErrorOr<SectionRef> DataSectionOrErr =
|
|
BC.getSectionForAddress(TargetAddress);
|
|
if (!DataSectionOrErr)
|
|
continue;
|
|
SectionRef DataSection = DataSectionOrErr.get();
|
|
if (!DataSection.isReadOnly())
|
|
continue;
|
|
uint32_t Offset = TargetAddress - DataSection.getAddress();
|
|
StringRef ConstantData;
|
|
if (std::error_code EC = DataSection.getContents(ConstantData)) {
|
|
errs() << "BOLT-ERROR: 'cannot get section contents': "
|
|
<< EC.message() << ".\n";
|
|
exit(1);
|
|
}
|
|
|
|
++NumLocalLoadsFound;
|
|
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
NumDynamicLocalLoadsFound += BB->getExecutionCount();
|
|
|
|
if (MIA->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
|
|
++NumLocalLoadsSimplified;
|
|
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
|
|
}
|
|
}
|
|
}
|
|
|
|
NumLoadsFound += NumLocalLoadsFound;
|
|
NumDynamicLoadsFound += NumDynamicLocalLoadsFound;
|
|
NumLoadsSimplified += NumLocalLoadsSimplified;
|
|
NumDynamicLoadsSimplified += NumDynamicLocalLoadsSimplified;
|
|
|
|
return NumLocalLoadsSimplified > 0;
|
|
}
|
|
|
|
void SimplifyRODataLoads::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &
|
|
) {
|
|
|
|
for (auto &It : BFs) {
|
|
auto &Function = It.second;
|
|
|
|
if (!Function.isSimple())
|
|
continue;
|
|
|
|
if (simplifyRODataLoads(BC, Function)) {
|
|
if (opts::PrintAll || opts::PrintSimplifyROLoads) {
|
|
Function.print(outs(),
|
|
"after simplifying read-only section loads",
|
|
true);
|
|
}
|
|
if (opts::DumpDotAll) {
|
|
Function.dumpGraphForPass("simplify-rodata-loads");
|
|
}
|
|
}
|
|
}
|
|
|
|
outs() << "BOLT-INFO: simplified " << NumLoadsSimplified << " out of "
|
|
<< NumLoadsFound << " loads from a statically computed address.\n"
|
|
<< "BOLT-INFO: dynamic loads simplified: " << NumDynamicLoadsSimplified
|
|
<< "\n"
|
|
<< "BOLT-INFO: dynamic loads found: " << NumDynamicLoadsFound << "\n";
|
|
}
|
|
|
|
void IdenticalCodeFolding::discoverCallers(
|
|
BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {
|
|
for (auto &I : BFs) {
|
|
BinaryFunction &Caller = I.second;
|
|
|
|
if (!Caller.isSimple())
|
|
continue;
|
|
|
|
for (BinaryBasicBlock &BB : Caller) {
|
|
unsigned BlockIndex = Caller.getIndex(&BB);
|
|
unsigned InstrIndex = 0;
|
|
|
|
for (MCInst &Inst : BB) {
|
|
if (!BC.MIA->isCall(Inst)) {
|
|
++InstrIndex;
|
|
continue;
|
|
}
|
|
|
|
const MCOperand &TargetOp = Inst.getOperand(0);
|
|
if (!TargetOp.isExpr()) {
|
|
// This is an inderect call, we cannot record
|
|
// a target.
|
|
++InstrIndex;
|
|
continue;
|
|
}
|
|
|
|
// Find the target function for this call.
|
|
const auto *TargetExpr = TargetOp.getExpr();
|
|
assert(TargetExpr->getKind() == MCExpr::SymbolRef);
|
|
const auto &TargetSymbol =
|
|
dyn_cast<MCSymbolRefExpr>(TargetExpr)->getSymbol();
|
|
const auto *Function = BC.getFunctionForSymbol(&TargetSymbol);
|
|
if (!Function) {
|
|
// Call to a function without a BinaryFunction object.
|
|
++InstrIndex;
|
|
continue;
|
|
}
|
|
// Insert a tuple in the Callers map.
|
|
Callers[Function].emplace_back(
|
|
CallSite(&Caller, BlockIndex, InstrIndex));
|
|
++InstrIndex;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void IdenticalCodeFolding::foldFunction(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
BinaryFunction *BFToFold,
|
|
BinaryFunction *BFToReplaceWith,
|
|
std::set<BinaryFunction *> &Modified) {
|
|
|
|
// Mark BFToFold as identical with BFTOreplaceWith.
|
|
BFToFold->setIdenticalFunctionAddress(BFToReplaceWith->getAddress());
|
|
|
|
// Add the size of BFToFold to the total size savings estimate.
|
|
BytesSavedEstimate += BFToFold->getSize();
|
|
|
|
// Get callers of BFToFold.
|
|
auto CI = Callers.find(BFToFold);
|
|
if (CI == Callers.end())
|
|
return;
|
|
std::vector<CallSite> &BFToFoldCallers = CI->second;
|
|
|
|
// Get callers of BFToReplaceWith.
|
|
std::vector<CallSite> &BFToReplaceWithCallers = Callers[BFToReplaceWith];
|
|
|
|
// Get MCSymbol for BFToReplaceWith.
|
|
MCSymbol *SymbolToReplaceWith =
|
|
BC.getOrCreateGlobalSymbol(BFToReplaceWith->getAddress(), "");
|
|
|
|
// Traverse callers of BFToFold and replace the calls with calls
|
|
// to BFToReplaceWith.
|
|
for (const CallSite &CS : BFToFoldCallers) {
|
|
// Get call instruction.
|
|
BinaryFunction *Caller = CS.Caller;
|
|
BinaryBasicBlock *CallBB = Caller->getBasicBlockAtIndex(CS.BlockIndex);
|
|
MCInst &CallInst = CallBB->getInstructionAtIndex(CS.InstrIndex);
|
|
|
|
// Replace call target with BFToReplaceWith.
|
|
auto Success = BC.MIA->replaceCallTargetOperand(CallInst,
|
|
SymbolToReplaceWith,
|
|
BC.Ctx.get());
|
|
assert(Success && "unexpected call target prevented the replacement");
|
|
|
|
// Add this call site to the callers of BFToReplaceWith.
|
|
BFToReplaceWithCallers.emplace_back(CS);
|
|
|
|
// Add caller to the set of modified functions.
|
|
Modified.insert(Caller);
|
|
|
|
// Update dynamic calls folded stat.
|
|
if (Caller->hasValidProfile() &&
|
|
CallBB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
NumDynamicCallsFolded += CallBB->getExecutionCount();
|
|
}
|
|
|
|
// Remove all callers of BFToFold.
|
|
BFToFoldCallers.clear();
|
|
|
|
++NumFunctionsFolded;
|
|
|
|
// Merge execution counts of BFToFold into those of BFToReplaceWith.
|
|
BFToFold->mergeProfileDataInto(*BFToReplaceWith);
|
|
}
|
|
|
|
void IdenticalCodeFolding::runOnFunctions(
|
|
BinaryContext &BC,
|
|
std::map<uint64_t, BinaryFunction> &BFs,
|
|
std::set<uint64_t> &
|
|
) {
|
|
|
|
discoverCallers(BC, BFs);
|
|
|
|
// This hash table is used to identify identical functions. It maps
|
|
// a function to a bucket of functions identical to it.
|
|
struct KeyHash {
|
|
std::size_t operator()(const BinaryFunction *F) const { return F->hash(); }
|
|
};
|
|
struct KeyEqual {
|
|
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
|
|
return A->isIdenticalWith(*B);
|
|
}
|
|
};
|
|
std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
|
|
KeyHash, KeyEqual> Buckets;
|
|
|
|
// Set that holds the functions that were modified by the last pass.
|
|
std::set<BinaryFunction *> Mod;
|
|
|
|
// Vector of all the candidate functions to be tested for being identical
|
|
// to each other. Initialized with all simple functions.
|
|
std::vector<BinaryFunction *> Cands;
|
|
for (auto &I : BFs) {
|
|
BinaryFunction *BF = &I.second;
|
|
if (BF->isSimple())
|
|
Cands.emplace_back(BF);
|
|
}
|
|
|
|
// We repeat the icf pass until no new modifications happen.
|
|
unsigned Iter = 1;
|
|
do {
|
|
Buckets.clear();
|
|
Mod.clear();
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
outs() << "BOLT-INFO: icf pass " << Iter << "...\n";
|
|
}
|
|
|
|
uint64_t NumIdenticalFunctions = 0;
|
|
|
|
// Compare candidate functions using the Buckets hash table. Identical
|
|
// functions are effiently discovered and added to the same bucket.
|
|
for (BinaryFunction *BF : Cands) {
|
|
Buckets[BF].emplace_back(BF);
|
|
}
|
|
|
|
Cands.clear();
|
|
|
|
// Go through the functions of each bucket and fold any references to them
|
|
// with the references to the hottest function among them.
|
|
for (auto &I : Buckets) {
|
|
std::vector<BinaryFunction *> &IFs = I.second;
|
|
std::sort(IFs.begin(), IFs.end(),
|
|
[](const BinaryFunction *A, const BinaryFunction *B) {
|
|
if (!A->hasValidProfile() && !B->hasValidProfile())
|
|
return false;
|
|
|
|
if (!A->hasValidProfile())
|
|
return false;
|
|
|
|
if (!B->hasValidProfile())
|
|
return true;
|
|
|
|
return B->getExecutionCount() < A->getExecutionCount();
|
|
}
|
|
);
|
|
BinaryFunction *Hottest = IFs[0];
|
|
|
|
// For the next pass, we consider only one function from each set of
|
|
// identical functions.
|
|
Cands.emplace_back(Hottest);
|
|
|
|
if (IFs.size() <= 1)
|
|
continue;
|
|
|
|
NumIdenticalFunctions += IFs.size() - 1;
|
|
for (unsigned i = 1; i < IFs.size(); ++i) {
|
|
BinaryFunction *BF = IFs[i];
|
|
foldFunction(BC, BFs, BF, Hottest, Mod);
|
|
}
|
|
}
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
outs() << "BOLT-INFO: found " << NumIdenticalFunctions
|
|
<< " identical functions.\n"
|
|
<< "BOLT-INFO: modified " << Mod.size() << " functions.\n";
|
|
}
|
|
|
|
NumIdenticalFunctionsFound += NumIdenticalFunctions;
|
|
|
|
++Iter;
|
|
} while (!Mod.empty());
|
|
|
|
outs() << "BOLT-INFO: ICF pass found " << NumIdenticalFunctionsFound
|
|
<< " functions identical to some other function.\n"
|
|
<< "BOLT-INFO: ICF pass folded references to " << NumFunctionsFolded
|
|
<< " functions.\n"
|
|
<< "BOLT-INFO: ICF pass folded " << NumDynamicCallsFolded << " dynamic"
|
|
<< " function calls.\n"
|
|
<< "BOLT-INFO: Removing all identical functions could save "
|
|
<< format("%.2lf", (double) BytesSavedEstimate / 1024)
|
|
<< " KB of code space.\n";
|
|
|
|
if (opts::PrintAll || opts::PrintICF) {
|
|
for (auto &I : BFs) {
|
|
I.second.print(outs(), "after identical code folding", true);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|