Make FLO work on hhvm binary.

Summary: Fixes several issues that prevented us from running hhvm binary.

(cherry picked from FBD2543057)
This commit is contained in:
Maksim Panchenko 2015-10-14 15:35:14 -07:00
parent ec22caff1e
commit b4ed5cc942
8 changed files with 188 additions and 113 deletions

View file

@ -9,6 +9,8 @@
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@ -17,14 +19,10 @@
#include <limits>
#include <string>
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
namespace llvm {
namespace flo {
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
@ -66,5 +64,4 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
}
} // namespace flo
} // namespace llvm

View file

@ -28,7 +28,6 @@
#include <limits>
namespace llvm {
namespace flo {
class BinaryFunction;
@ -222,7 +221,6 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
} // namespace flo
} // namespace llvm
#endif

47
bolt/BinaryContext.cpp Normal file
View file

@ -0,0 +1,47 @@
//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryContext.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
namespace llvm {
namespace flo {
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix) {
MCSymbol *Symbol{nullptr};
std::string Name;
auto NI = GlobalAddresses.find(Address);
if (NI != GlobalAddresses.end()) {
// Even though there could be multiple names registered at the address,
// we only use the first one.
Name = NI->second;
} else {
Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
assert(GlobalSymbols.find(Name) == GlobalSymbols.end() &&
"created name is not unique");
GlobalAddresses.emplace(std::make_pair(Address, Name));
}
Symbol = Ctx->lookupSymbol(Name);
if (Symbol)
return Symbol;
Symbol = Ctx->getOrCreateSymbol(Name);
GlobalSymbols[Name] = Address;
return Symbol;
}
} // namespace flo
} // namespace llvm

View file

@ -7,6 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
// Context for processing binary executables in files and/or memory.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
@ -24,27 +26,25 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
#include <functional>
#include <map>
#include <string>
#include <system_error>
namespace llvm {
namespace flo {
class DataReader;
/// Everything that's needed to process binaries lives here.
class BinaryContext {
BinaryContext() = delete;
public:
// [name] -> [address]
// [name] -> [address] map used for global symbol resolution.
typedef std::map<std::string, uint64_t> SymbolMapType;
SymbolMapType GlobalSymbols;
@ -111,10 +111,15 @@ public:
DR(DR) {}
~BinaryContext() {}
/// Return a global symbol registered at a given \p Address. If no symbol
/// exists, create one with unique name using \p Prefix.
/// If there are multiple symbols registered at the \p Address, then
/// return the first one.
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix);
};
} // namespace flo
} // namespace llvm
#endif

View file

@ -10,6 +10,9 @@
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@ -23,15 +26,10 @@
#include <queue>
#include <string>
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
namespace llvm {
namespace flo {
BinaryBasicBlock *
@ -181,7 +179,16 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
break;
}
if (MIA->isUnsupported(Instruction)) {
DEBUG(dbgs() << "FLO: unsupported instruction seen. Skipping function "
<< getName() << ".\n");
IsSimple = false;
break;
}
if (MIA->isIndirectBranch(Instruction)) {
DEBUG(dbgs() << "FLO: indirect branch seen. Skipping function "
<< getName() << ".\n");
IsSimple = false;
break;
}
@ -231,21 +238,8 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
} else {
// This is a call regardless of the opcode (e.g. tail call).
IsCall = true;
// Check if we already have a symbol at this address.
std::string Name;
auto NI = BC.GlobalAddresses.find(InstructionTarget);
if (NI != BC.GlobalAddresses.end()) {
// Any registered name will do.
Name = NI->second;
} else {
// Create a new symbol at the destination.
Name = (Twine("FUNCat0x") +
Twine::utohexstr(InstructionTarget)).str();
BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget,
Name));
}
TargetSymbol = Ctx->getOrCreateSymbol(Name);
BC.GlobalSymbols[Name] = InstructionTarget;
TargetSymbol = BC.getOrCreateGlobalSymbol(InstructionTarget,
"FUNCat");
}
}
@ -282,19 +276,8 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
IsSimple = false;
break;
}
std::string Name;
auto NI = BC.GlobalAddresses.find(TargetAddress);
if (NI != BC.GlobalAddresses.end()) {
Name = NI->second;
} else {
// Register new "data" symbol at the destination.
Name = (Twine("DATAat0x") + Twine::utohexstr(TargetAddress)).str();
BC.GlobalAddresses.emplace(std::make_pair(TargetAddress,
Name));
}
TargetSymbol = Ctx->getOrCreateSymbol(Name);
BC.GlobalSymbols[Name] = TargetAddress;
// FIXME: check that the address is in data, not in code.
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
MIA->replaceRIPOperandDisp(
Instruction,
MCOperand::createExpr(
@ -806,5 +789,4 @@ void BinaryFunction::solveOptimalLayout(bool DumpLayout) {
}
} // namespace flo
} // namespace llvm

View file

@ -17,6 +17,8 @@
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
#define LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/MC/MCCodeEmitter.h"
@ -31,13 +33,9 @@
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
using namespace llvm::object;
namespace llvm {
namespace flo {
/// BinaryFunction is a representation of machine-level function.
@ -184,10 +182,10 @@ public:
BasicBlocksLayout.end());
}
BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section,
BinaryFunction(std::string Name, SymbolRef Symbol, SectionRef Section,
uint64_t Address, uint64_t Size, BinaryContext &BC) :
Name(Name), Symbol(Symbol), Section(Section), Address(Address),
Size(Size), BC(BC), CodeSectionName((".text." + Name).str()) {}
Size(Size), BC(BC), CodeSectionName(".text." + Name) {}
/// Perform optimal code layout based on edge frequencies making necessary
/// adjustments to instructions at the end of basic blocks.
@ -411,7 +409,6 @@ inline raw_ostream &operator<<(raw_ostream &OS,
}
} // namespace flo
} // namespace llvm
#endif

View file

@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_tool(llvm-flo
llvm-flo.cpp
BinaryBasicBlock.cpp
BinaryContext.cpp
BinaryFunction.cpp
DataReader.cpp
)

View file

@ -13,6 +13,10 @@
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
@ -45,12 +49,6 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include <algorithm>
#include <map>
#include <system_error>
@ -62,7 +60,8 @@ using namespace llvm;
using namespace object;
using namespace flo;
// Tool options.
namespace opts {
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<executable>"), cl::Required);
@ -78,6 +77,17 @@ FunctionNames("funcs",
cl::desc("list of functions to optimize"),
cl::value_desc("func1,func2,func3,..."));
static cl::list<std::string>
SkipFunctionNames("skip_funcs",
cl::CommaSeparated,
cl::desc("list of functions to skip"),
cl::value_desc("func1,func2,func3,..."));
static cl::opt<unsigned>
MaxFunctions("max_funcs",
cl::desc("maximum # of functions to overwrite"),
cl::Optional);
static cl::opt<bool>
EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"),
@ -99,6 +109,7 @@ DumpFunctions("dump-functions", cl::desc("dump parsed functions (debugging)"),
static cl::opt<bool>
DumpLayout("dump-layout", cl::desc("dump parsed flo data (debugging)"),
cl::Hidden);
} // namespace opts
static StringRef ToolName;
@ -283,7 +294,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
return;
}
// Store all non-zero file symbols in this map for quick address lookup.
// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, SymbolRef> FileSymRefs;
// Entry point to the binary.
@ -298,7 +309,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
// For local symbols we want to keep track of associated FILE symbol for
// disambiguation by name.
std::map<uint64_t, BinaryFunction> BinaryFunctions;
StringRef FileSymbolName;
std::string FileSymbolName;
for (const SymbolRef &Symbol : File->symbols()) {
// Keep undefined symbols for pretty printing?
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
@ -329,7 +340,36 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
if (Name->empty())
continue;
BC->GlobalAddresses.emplace(std::make_pair(Address, *Name));
// Disambiguate all local symbols before adding to symbol table.
// Since we don't know if we'll see a global with the same name,
// always modify the local name.
std::string UniqueName;
if (Symbol.getFlags() & SymbolRef::SF_Global) {
assert(BC->GlobalSymbols.find(*Name) == BC->GlobalSymbols.end() &&
"global name not unique");
UniqueName = *Name;
} else {
unsigned LocalCount = 1;
std::string LocalName = (*Name).str() + "/" + FileSymbolName + "/";
while (BC->GlobalSymbols.find(LocalName + std::to_string(LocalCount)) !=
BC->GlobalSymbols.end()) {
++LocalCount;
}
UniqueName = LocalName + std::to_string(LocalCount);
}
/// It's possible we are seeing a globalized local. Even though
/// we've made the name unique, LLVM might still treat it as local
/// if it has a "private global" prefix, e.g. ".L". Thus we have to
/// change the prefix to enforce global scope of the symbol.
if (StringRef(UniqueName).startswith(BC->AsmInfo->getPrivateGlobalPrefix()))
UniqueName = "PG." + UniqueName;
// Add the name to global symbols map.
BC->GlobalSymbols[UniqueName] = Address;
// Add to the reverse map. There could multiple names at the same address.
BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueName));
// Only consider ST_Function symbols for functions. Although this
// assumption could be broken by assembly functions for which the type
@ -353,35 +393,12 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
continue;
}
// Disambiguate local function name. Since we don't know if we'll see
// a global with the same name, always modify the local function name.
std::string UniqueFunctionName;
if (!(Symbol.getFlags() & SymbolRef::SF_Global)) {
unsigned LocalCount = 1;
auto LocalName = *Name + "/" + FileSymbolName + "/";
while (BC->GlobalSymbols.find((LocalName + Twine(LocalCount)).str()) !=
BC->GlobalSymbols.end()) {
++LocalCount;
}
UniqueFunctionName = (LocalName + Twine(LocalCount)).str();
} else {
auto I = BC->GlobalSymbols.find(*Name);
assert(I == BC->GlobalSymbols.end() && "global name not unique");
UniqueFunctionName = *Name;
}
// Create the function and add to the map.
BinaryFunctions.emplace(
Address,
BinaryFunction(UniqueFunctionName, Symbol, *Section, Address,
BinaryFunction(UniqueName, Symbol, *Section, Address,
SymbolSize, *BC)
);
// Add the name to global symbols map.
BC->GlobalSymbols[UniqueFunctionName] = Address;
// Add to the reverse map.
BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueFunctionName));
}
// Disassemble every function and build it's control flow graph.
@ -404,8 +421,12 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
auto SymRefI = FileSymRefs.upper_bound(Function.getAddress());
if (SymRefI != FileSymRefs.end()) {
auto MaxSize = SymRefI->first - Function.getAddress();
assert(MaxSize >= Function.getSize() &&
"symbol seen in the middle of the function");
if (MaxSize < Function.getSize()) {
DEBUG(dbgs() << "FLO: symbol seen in the middle of the function "
<< Function.getName() << ". Skipping.\n");
Function.setSimple(false);
continue;
}
Function.setMaxSize(MaxSize);
}
@ -434,11 +455,11 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
if (!Function.buildCFG())
continue;
if (DumpFunctions)
if (opts::DumpFunctions)
Function.print(errs(), true);
} // Iterate over all functions
if (DumpFunctions)
if (opts::DumpFunctions)
return;
// Run optimization passes.
@ -452,7 +473,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
// FIXME: this wouldn't work with C++ exceptions until we implement
// support for those as there will be "invisible" edges
// in the graph.
if (EliminateUnreachable) {
if (opts::EliminateUnreachable) {
bool IsFirst = true;
for (auto &BB : Function) {
if (!IsFirst && BB.pred_empty()) {
@ -465,8 +486,8 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
DEBUG(dbgs() << "*** After unreachable block elimination ***\n");
DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true));
}
if (ReorderBlocks) {
BFI.second.optimizeLayout(DumpLayout);
if (opts::ReorderBlocks) {
BFI.second.optimizeLayout(opts::DumpLayout);
}
}
@ -475,12 +496,14 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
// This is an object file, which we keep for debugging purposes.
// Once we decide it's useless, we should create it in memory.
std::unique_ptr<tool_output_file> Out =
llvm::make_unique<tool_output_file>(OutputFilename + ".o",
llvm::make_unique<tool_output_file>(opts::OutputFilename + ".o",
EC, sys::fs::F_None);
check_error(EC, "cannot create output object file");
std::unique_ptr<tool_output_file> RealOut =
llvm::make_unique<tool_output_file>(OutputFilename, EC, sys::fs::F_None,
llvm::make_unique<tool_output_file>(opts::OutputFilename,
EC,
sys::fs::F_None,
0777);
check_error(EC, "cannot create output executable file");
@ -513,18 +536,31 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
if (!Function.isSimple())
continue;
// Only overwrite functions from the list if non-empty.
if (!FunctionNames.empty()) {
bool IsValid = false;
for (auto &Name : FunctionNames) {
// Check against lists of functions from options if we should
// optimize the function.
bool IsValid = true;
if (!opts::FunctionNames.empty()) {
IsValid = false;
for (auto &Name : opts::FunctionNames) {
if (Function.getName() == Name) {
IsValid = true;
break;
}
}
if (!IsValid)
continue;
}
if (!IsValid)
continue;
if (!opts::SkipFunctionNames.empty()) {
for (auto &Name : opts::SkipFunctionNames) {
if (Function.getName() == Name) {
IsValid = false;
break;
}
}
}
if (!IsValid)
continue;
DEBUG(dbgs() << "FLO: generating code for function \""
<< Function.getName() << "\"\n");
@ -635,6 +671,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
Writer.setStream(RealOut->os());
// Overwrite function in the output file.
uint64_t CountOverwrittenFunctions = 0;
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
@ -663,6 +700,13 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
BC->MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(),
&Writer);
RealOut->os().seek(Pos);
++CountOverwrittenFunctions;
if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) {
outs() << "FLO: maximum number of functions reached\n";
break;
}
}
if (EntryPointFunction) {
@ -672,6 +716,9 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
DEBUG(dbgs() << "FLO: no entry point function was set\n");
}
outs() << "FLO: " << CountOverwrittenFunctions
<< " out of " << BinaryFunctions.size()
<< " functions were overwritten.\n";
// TODO: we should find a way to mark the binary as optimized by us.
Out->keep();
@ -702,35 +749,36 @@ int main(int argc, char **argv) {
ToolName = argv[0];
if (!sys::fs::exists(InputFilename))
report_error(InputFilename, errc::no_such_file_or_directory);
if (!sys::fs::exists(opts::InputFilename))
report_error(opts::InputFilename, errc::no_such_file_or_directory);
std::unique_ptr<flo::DataReader> DR(new DataReader(errs()));
if (!InputDataFilename.empty()) {
if (!sys::fs::exists(InputDataFilename))
report_error(InputDataFilename, errc::no_such_file_or_directory);
if (!opts::InputDataFilename.empty()) {
if (!sys::fs::exists(opts::InputDataFilename))
report_error(opts::InputDataFilename, errc::no_such_file_or_directory);
// Attempt to read input flo data
auto ReaderOrErr = flo::DataReader::readPerfData(InputDataFilename, errs());
auto ReaderOrErr =
flo::DataReader::readPerfData(opts::InputDataFilename, errs());
if (std::error_code EC = ReaderOrErr.getError())
report_error(InputDataFilename, EC);
report_error(opts::InputDataFilename, EC);
DR.reset(ReaderOrErr.get().release());
if (DumpData) {
if (opts::DumpData) {
DR->dump();
return EXIT_SUCCESS;
}
}
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFilename);
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(opts::InputFilename);
if (std::error_code EC = BinaryOrErr.getError())
report_error(InputFilename, EC);
report_error(opts::InputFilename, EC);
Binary &Binary = *BinaryOrErr.get().getBinary();
if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
OptimizeFile(e, *DR.get());
} else {
report_error(InputFilename, object_error::invalid_file_type);
report_error(opts::InputFilename, object_error::invalid_file_type);
}
return EXIT_SUCCESS;