Indirect call optimization.

(cherry picked from FBD28110629)
This commit is contained in:
Bill Nell 2016-06-07 16:27:52 -07:00 committed by Maksim Panchenko
parent 45e2219ae4
commit 8bcfd9a392
9 changed files with 434 additions and 12 deletions

View file

@ -290,6 +290,11 @@ public:
return Offset;
}
/// Set offset of the basic block from the function start.
void setOffset(uint64_t newOffset) {
Offset = newOffset;
}
/// Adds block to successor list, and also updates predecessor list for
/// successor block.
/// Set branch info for this path.

View file

@ -220,7 +220,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "\n";
return;
}
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
if (!BC.MIA->isUnsupported(Instruction)) {
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
} else {
OS << "unsupported (probably jmpr)";
}
if (BC.MIA->isCall(Instruction)) {
if (BC.MIA->isTailCall(Instruction))
OS << " # TAILCALL ";
@ -542,12 +546,6 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
}
Instruction.clear();
Instruction.addOperand(
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
if (!IsCall) {
// Add local branch info.
LocalBranches.push_back({Offset, TargetOffset});
@ -556,15 +554,54 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// Add fallthrough branch info.
FTBranches.push_back({Offset, Offset + Size});
}
if (IsCall || !IsCondBranch) {
if (MIA->isIndirectBranch(Instruction)) {
#if 0
dbgs() << "Indirect call/branch @ "
<< Twine::utohexstr(Offset) << "\n";
#endif
NonLocalIndirectBranches.push_back(Offset);
}
}
Instruction.clear();
Instruction.addOperand(
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
} else {
if (MIA->isCall(Instruction)) {
#if 0
dbgs() << getName() << ": indirect call/branch @ "
<< Twine::utohexstr(Offset) << "\n";
#endif
NonLocalIndirectBranches.push_back(Offset);
}
// Should be an indirect call or an indirect branch. Bail out on the
// latter case.
if (MIA->isIndirectBranch(Instruction)) {
DEBUG(dbgs() << "BOLT-WARNING: indirect branch detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< ". Skipping function " << getName() << ".\n");
IsSimple = false;
if (!MIA->isConditionalBranch(Instruction)) {
#if 0
dbgs() << getName() << ": indirect call/branch @ "
<< Twine::utohexstr(Offset) << "\n";
#endif
NonLocalIndirectBranches.push_back(Offset);
MCInst tmp(Instruction);
if (1 || !MIA->isTerminator(tmp) || !MIA->convertJmpToTailCall(tmp)) {
IsSimple = false;
}
} else {
IsSimple = false;
}
}
// Indirect call. We only need to fix it if the operand is RIP-relative
if (MIA->hasRIPOperand(Instruction)) {
if (!handleRIPOperand(Instruction, AbsoluteInstrAddr, Size)) {

View file

@ -19,6 +19,7 @@
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "DataReader.h"
#include "DebugData.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
@ -36,6 +37,7 @@
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <map>
#include <unordered_map>
#include <vector>
using namespace llvm::object;
@ -235,6 +237,10 @@ private:
using LandingPadsMapType = std::map<const MCSymbol *, std::vector<unsigned> >;
LandingPadsMapType LPToBBIndex;
/// Storage for non-local branches
using NonLocalIndirectBranchesListType = std::vector<uint32_t>;
NonLocalIndirectBranchesListType NonLocalIndirectBranches;
/// Map offset in the function to a local label.
using LabelsMapType = std::map<uint32_t, MCSymbol *>;
LabelsMapType Labels;
@ -288,7 +294,7 @@ private:
// Map that keeps track of the index of each basic block in the BasicBlocks
// vector. Used to make getIndex fast.
std::map<const BinaryBasicBlock*, unsigned> BasicBlockIndices;
std::unordered_map<const BinaryBasicBlock*, unsigned> BasicBlockIndices;
// At each basic block entry we attach a CFI state to detect if reordering
// corrupts the CFI state for a block. The CFI state is simply the index in
@ -336,6 +342,28 @@ public:
typedef BasicBlockOrderType::iterator order_iterator;
typedef BasicBlockOrderType::const_iterator const_order_iterator;
typedef NonLocalIndirectBranchesListType::iterator nlib_iterator;
typedef NonLocalIndirectBranchesListType::const_iterator const_nlib_iterator;
nlib_iterator begin_nlibs() {
return NonLocalIndirectBranches.begin();
}
const_nlib_iterator begin_nlibs() const {
return NonLocalIndirectBranches.begin();
}
nlib_iterator end_nlibs() {
return NonLocalIndirectBranches.end();
}
const_nlib_iterator end_nlibs() const {
return NonLocalIndirectBranches.end();
}
inline iterator_range<nlib_iterator> nlibs() {
return iterator_range<nlib_iterator>(begin_nlibs(), end_nlibs());
}
inline iterator_range<const_nlib_iterator> nlibs() const {
return iterator_range<const_nlib_iterator>(begin_nlibs(), end_nlibs());
}
// CFG iterators.
iterator begin() { return BasicBlocks.begin(); }
const_iterator begin() const { return BasicBlocks.begin(); }
@ -525,7 +553,8 @@ public:
/// Returns NULL if basic block already exists at the \p Offset.
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label,
bool DeriveAlignment = false) {
assert(!getBasicBlockAtOffset(Offset) && "basic block already exists");
assert(CurrentState == State::CFG ||
(!getBasicBlockAtOffset(Offset) && "basic block already exists"));
assert(BC.Ctx && "cannot be called with empty context");
if (!Label)
Label = BC.Ctx->createTempSymbol("BB", true);
@ -556,6 +585,13 @@ public:
return nullptr;
}
void updateLayout(BinaryBasicBlock* start,
const std::vector<BinaryBasicBlock*>& newBBs) {
BasicBlocksLayout.insert(BasicBlocksLayout.begin() + getIndex(start) + 1,
newBBs.begin(),
newBBs.end());
}
/// Return basic block that originally contained offset \p Offset
/// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);

View file

@ -24,6 +24,13 @@ OptimizeBodylessFunctions(
llvm::cl::desc("optimize functions that just do a tail call"),
llvm::cl::Optional);
static llvm::cl::opt<bool>
OptimizeIndirectBranches(
"optimize-indirect-branches",
llvm::cl::desc("optimize indirect branches"),
llvm::cl::init(true),
llvm::cl::Optional);
static llvm::cl::opt<bool>
InlineSmallFunctions(
"inline-small-functions",
@ -65,6 +72,9 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(std::move(llvm::make_unique<ReorderBasicBlocks>()));
Manager.registerPass(llvm::make_unique<OptimizeIndirectBranches>(),
opts::OptimizeIndirectBranches);
Manager.registerPass(llvm::make_unique<SimplifyConditionalTailCalls>(),
opts::SimplifyConditionalTailCalls);

View file

@ -66,7 +66,7 @@ private:
/// Runs all enabled implemented passes on all functions.
static void runAllPasses(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &Functions,
std::set<uint64_t> &largeFunctions);
std::set<uint64_t> &LargeFunctions);
};

View file

@ -23,6 +23,21 @@ extern llvm::cl::opt<bool> PrintUCE;
extern llvm::cl::opt<llvm::bolt::BinaryFunction::SplittingType> SplitFunctions;
extern bool shouldProcess(const llvm::bolt::BinaryFunction &Function);
static llvm::cl::opt<int>
OptimizeIndirectBranchesThreshold(
"optimize-indirect-branches-threshold",
llvm::cl::desc("threshold for optimizing a frequently taken indirect call"),
llvm::cl::init(90),
llvm::cl::Optional);
static llvm::cl::opt<int>
OptimizeIndirectBranchesTopN(
"optimize-indirect-branches-topn",
llvm::cl::desc("number of targets to consider when doing indirect "
"branch optimization"),
llvm::cl::init(2),
llvm::cl::Optional);
static llvm::cl::opt<llvm::bolt::BinaryFunction::LayoutType>
ReorderBlocks(
"reorder-blocks",
@ -527,5 +542,296 @@ void SimplifyConditionalTailCalls::runOnFunctions(
<< " from a total of " << NumTailCallCandidates << "\n";
}
namespace {
template <typename S>
void printInstruction(S& OS, BinaryContext& BC, const MCInst &Instruction, bool printMCInst = false) {
if (!BC.MIA->isUnsupported(Instruction)) {
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
} else {
OS << "unsupported (probably jmpr)";
}
OS << "\n";
if (printMCInst) {
Instruction.dump_pretty(OS, BC.InstPrinter.get());
OS << "\n";
}
}
template <typename Itr>
uint64_t computeCodeSize(BinaryContext& BC, Itr beg, Itr end) {
uint64_t size = 0;
while (beg != end) {
// Calculate the size of the instruction.
// Note: this is imprecise since happening prior to relaxation.
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
printInstruction(dbgs(), BC, *beg, false);
BC.MCE->encodeInstruction(*beg++, VecOS, Fixups, *BC.STI);
size += Code.size();
}
return size;
}
}
void OptimizeIndirectBranches::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions
) {
uint64_t TotalBranches = 0;
uint64_t TotalIndirectCalls = 0;
uint64_t TotalIndirectCallsites = 0;
uint64_t TotalIndirectCandidateCalls = 0;
for (auto &BFIt : BFs) {
auto &Function = BFIt.second;
if (!Function.isSimple() || !opts::shouldProcess(Function))
continue;
auto BranchDataOrErr = BC.DR.getFuncBranchData(Function.getName());
if (std::error_code EC = BranchDataOrErr.getError()) {
DEBUG(dbgs() << "no branch data found for \""
<< Function.getName() << "\"\n");
continue;
}
const FuncBranchData &BranchData = BranchDataOrErr.get();
// Note: this is not just counting calls.
TotalBranches += BranchData.ExecutionCount;
uint64_t Total = 0;
for (auto &nlib : Function.nlibs()) {
auto Branches = BranchData.getBranchRange(nlib);
for (auto &BInfo : Branches) {
Total += BInfo.Branches;
}
std::vector<BranchInfo> targets;
for (auto &BInfo : Branches) {
targets.push_back(BInfo);
}
std::sort(targets.begin(), targets.end(),
[](const BranchInfo& a, const BranchInfo& b) {
return a.Branches > b.Branches;
});
if (!targets.empty()) {
uint64_t TopNBranches = 0;
const int NumTargets = std::distance(targets.begin(), targets.end());
const int N = std::min(int(opts::OptimizeIndirectBranchesTopN),
NumTargets);
for (int i = 0; i < N; ++i) {
TopNBranches += targets[i].Branches;
}
const double TopNFrequency = 100.0 * TopNBranches / Total;
if (TopNFrequency >= opts::OptimizeIndirectBranchesThreshold) {
double Threshold = double(opts::OptimizeIndirectBranchesThreshold);
bool Separator = false;
dbgs() << "BOLT: candidate branch info: "
<< Function.getName() << " @ " << nlib
<< " -> ";
for (int i = 0; i < N && Threshold > 0; i++) {
const auto Frequency = 100.0 * targets[i].Branches / Total;
if (Separator) {
dbgs() << " | ";
}
Separator = true;
dbgs() << targets[i].To.Name
<< ", count = " << targets[i].Branches
<< ", mispreds = " << targets[i].Mispreds
<< ", freq = " << (int)Frequency << "%";
TotalIndirectCandidateCalls += targets[i].Branches;
Threshold -= Frequency;
}
dbgs() << "\n";
//assert(!targets[0].From.IsSymbol);
auto IndCallBlock =
Function.getBasicBlockContainingOffset(targets[0].From.Offset);
#if 0
// scan insts for jump (use analyze?)
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
bool Found = MIA->analyzeBranch(IndCallBlock->Instructions,
TBB,
FBB,
CondBranch,
UncondBranch);
assert(Found);
// how to assert that UncondBranch is the one we want?
assert(UncondBranch != nullptr);
#else
MCInst* CallInst = nullptr;
uint64_t InstOffset{RoundUpToAlignment(IndCallBlock->getOffset(),
IndCallBlock->getAlignment())};
size_t CallInstIdx = 0;
for (auto &Instr : *IndCallBlock) {
// Calculate the size of the instruction.
// Note: this is imprecise since happening prior to relaxation.
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI);
if (InstOffset == targets[0].From.Offset) {
CallInst = &Instr;
}
++CallInstIdx;
InstOffset += Code.size();
}
assert(CallInst);
#endif
std::vector<MCSymbol*> Targets;
for (int i = 0; i < N; ++i) {
assert(targets[i].To.IsSymbol);
// Is this right? lookupSym doesn't always return a result
auto Symbol = BC.Ctx->getOrCreateSymbol(targets[i].To.Name);
assert(Symbol);
Targets.push_back(Symbol);
}
MCInst* SourceInst = CallInst; // for now
#if 0
for (auto &Instr : *IndCallBlock) {
if (&Instr == CallInst) break;
if (Instr.getNumOperands() > 0) {
printInstruction(dbgs(), BC, Instr, true);
for (unsigned int i = 0; i < Instr.getNumOperands(); ++i) {
auto &Operand = Instr.getOperand(i);
dbgs() << "isreg("<< i << ") = " << Operand.isReg() << "\n";
dbgs() << "isexpr(" << i << ") = " << Operand.isExpr() << "\n";
SourceInst = &Instr; // WRONG
}
}
if (&Instr == CallInst) break;
}
dbgs() << "-----------\n";
assert(SourceInst);
#endif
auto ICPcode = BC.MIA->indirectCallPromotion(
*SourceInst, // == CallInst for now
*CallInst,
Targets,
BC.Ctx.get());
if (!ICPcode.empty()) {
for (auto &entry : ICPcode) {
auto &Sym = entry.first;
auto &Insts = entry.second;
if (Sym) dbgs() << Sym->getName() << ":\n";
for (auto &Instr : Insts) {
printInstruction(dbgs(), BC, Instr, false);
}
}
// create new bbs with correct code in each one
// first
auto oldSuccRange = IndCallBlock->successors();
std::vector<BinaryBasicBlock*> oldSucc(oldSuccRange.begin(), oldSuccRange.end());
BinaryBasicBlock* LastBlock = IndCallBlock;
BinaryBasicBlock* MergeBlock = nullptr;
std::vector<BinaryBasicBlock*> newBBs;
assert(!BC.MIA->isTailCall(*CallInst) || oldSucc.empty());
// Remove all successors from block doing the indirect call.
for (auto succ : oldSucc) {
IndCallBlock->removeSuccessor(succ);
}
assert(IndCallBlock->succ_empty());
dbgs() << "IndCallBlock = " << IndCallBlock << "\n";
if (ICPcode.back().second.empty()) { // merge block
// Create BB for merge block following old call
uint64_t total = 0;
for (auto &entry : ICPcode) {
total += computeCodeSize(BC, entry.second.begin(), entry.second.end());
}
// adjust all other blocks by total
for (auto &BB : Function) {
if (BB.getOffset() > IndCallBlock->getOffset()) {
BB.setOffset(BB.getOffset() + total);
}
}
//dbgs() << "total = " << total << "\n";
//dbgs() << "InstOffset = " << InstOffset << "\n";
MergeBlock = Function.addBasicBlock(total + InstOffset, ICPcode.back().first);
newBBs.push_back(MergeBlock);
for (auto succ : oldSucc) {
MergeBlock->addSuccessor(succ);
}
dbgs() << "MergeBlock = " << MergeBlock << "\n";
// Move instructions from the tail of the original call block
// to the merge block.
std::vector<MCInst> MovedInst;
while(&IndCallBlock->back() != CallInst) {
auto &lastInst = IndCallBlock->back();
MovedInst.push_back(lastInst);
IndCallBlock->eraseInstruction(&lastInst);
}
IndCallBlock->eraseInstruction(CallInst);
for (auto itr = MovedInst.rbegin(); itr != MovedInst.rend(); ++itr) {
MergeBlock->addInstruction(*itr);
}
ICPcode.pop_back(); // remove merge block
}
for (auto &entry : ICPcode) {
auto &Sym = entry.first;
auto &Insts = entry.second;
if (Sym) {
auto TBB = Function.addBasicBlock(InstOffset, Sym);
newBBs.push_back(TBB);
LastBlock->addSuccessor(TBB);
LastBlock = TBB;
InstOffset += computeCodeSize(BC, Insts.begin(), Insts.end());
dbgs() << "TBB = " << TBB << "\n";
}
for (auto &Inst : Insts) {
LastBlock->addInstruction(Inst);
}
if (MergeBlock) LastBlock->addSuccessor(MergeBlock);
}
// update BBlayout in Function, XXX is this right?
Function.updateLayout(IndCallBlock, newBBs);
}
}
}
++TotalIndirectCallsites;
}
TotalIndirectCalls += Total;
}
dbgs() << "BOLT: total indirect callsites/candidate calls/calls/branches = "
<< TotalIndirectCallsites << "/"
<< TotalIndirectCandidateCalls << "/"
<< TotalIndirectCalls << "/"
<< TotalBranches << "\n";
}
} // namespace bolt
} // namespace llvm

View file

@ -148,6 +148,14 @@ class SimplifyConditionalTailCalls : public BinaryFunctionPass {
std::set<uint64_t> &LargeFunctions) override;
};
/// Optimize indirect calls.
class OptimizeIndirectBranches : public BinaryFunctionPass {
public:
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
} // namespace bolt
} // namespace llvm

View file

@ -18,6 +18,21 @@
namespace llvm {
namespace bolt {
iterator_range<FuncBranchData::ContainerTy::const_iterator>
FuncBranchData::getBranchRange(uint64_t From) const {
assert(std::is_sorted(Data.begin(), Data.end()));
struct Compare {
bool operator()(const BranchInfo &BI, const uint64_t val) const {
return BI.From.Offset < val;
}
bool operator()(const uint64_t val, const BranchInfo &BI) const {
return val < BI.From.Offset;
}
};
auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
}
ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
uint64_t To) const {
for (const auto &I : Data) {
@ -195,8 +210,12 @@ std::error_code DataReader::parse() {
I = GetOrCreateFuncEntry(BI.To.Name);
I->getValue().ExecutionCount += BI.Branches;
}
}
for (auto &FuncBranches : FuncsMap) {
std::sort(FuncBranches.second.Data.begin(), FuncBranches.second.Data.end());
}
return std::error_code();
}

View file

@ -96,6 +96,7 @@ struct FuncBranchData {
FuncBranchData(StringRef Name, ContainerTy Data, ContainerTy EntryData)
: Name(Name), Data(std::move(Data)), EntryData(std::move(EntryData)) {}
iterator_range<ContainerTy::const_iterator> getBranchRange(uint64_t From) const;
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
};