From b4ed5cc942ee8453ff699b7317c449d9c72e553e Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 14 Oct 2015 15:35:14 -0700 Subject: [PATCH] Make FLO work on hhvm binary. Summary: Fixes several issues that prevented us from running hhvm binary. (cherry picked from FBD2543057) --- bolt/BinaryBasicBlock.cpp | 7 +- bolt/BinaryBasicBlock.h | 2 - bolt/BinaryContext.cpp | 47 +++++++++++ bolt/BinaryContext.h | 15 ++-- bolt/BinaryFunction.cpp | 50 ++++-------- bolt/BinaryFunction.h | 11 +-- bolt/CMakeLists.txt | 1 + bolt/llvm-flo.cpp | 168 ++++++++++++++++++++++++-------------- 8 files changed, 188 insertions(+), 113 deletions(-) create mode 100644 bolt/BinaryContext.cpp diff --git a/bolt/BinaryBasicBlock.cpp b/bolt/BinaryBasicBlock.cpp index 70a84280215f..225b04f02eaa 100644 --- a/bolt/BinaryBasicBlock.cpp +++ b/bolt/BinaryBasicBlock.cpp @@ -9,6 +9,8 @@ // //===----------------------------------------------------------------------===// +#include "BinaryBasicBlock.h" +#include "BinaryFunction.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -17,14 +19,10 @@ #include #include -#include "BinaryBasicBlock.h" -#include "BinaryFunction.h" - #undef DEBUG_TYPE #define DEBUG_TYPE "flo" namespace llvm { - namespace flo { bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) { @@ -66,5 +64,4 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) { } } // namespace flo - } // namespace llvm diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index 9903710007be..11d94ee1fe9b 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -28,7 +28,6 @@ #include namespace llvm { - namespace flo { class BinaryFunction; @@ -222,7 +221,6 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); } // namespace flo - } // namespace llvm #endif diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp new file mode 100644 index 000000000000..eb0f6dd6ef72 --- /dev/null +++ b/bolt/BinaryContext.cpp @@ -0,0 +1,47 @@ +//===--- BinaryContext.cpp - Interface for machine-level context ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "BinaryContext.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" + +namespace llvm { +namespace flo { + +MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, + Twine Prefix) { + MCSymbol *Symbol{nullptr}; + std::string Name; + auto NI = GlobalAddresses.find(Address); + if (NI != GlobalAddresses.end()) { + // Even though there could be multiple names registered at the address, + // we only use the first one. + Name = NI->second; + } else { + Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); + assert(GlobalSymbols.find(Name) == GlobalSymbols.end() && + "created name is not unique"); + GlobalAddresses.emplace(std::make_pair(Address, Name)); + } + + Symbol = Ctx->lookupSymbol(Name); + if (Symbol) + return Symbol; + + Symbol = Ctx->getOrCreateSymbol(Name); + GlobalSymbols[Name] = Address; + + return Symbol; +} + +} // namespace flo +} // namespace llvm diff --git a/bolt/BinaryContext.h b/bolt/BinaryContext.h index 0e6f3ca7724f..ab0e9888941a 100644 --- a/bolt/BinaryContext.h +++ b/bolt/BinaryContext.h @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// // +// Context for processing binary executables in files and/or memory. +// //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H @@ -24,27 +26,25 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" - #include #include #include #include namespace llvm { - namespace flo { class DataReader; -/// Everything that's needed to process binaries lives here. class BinaryContext { BinaryContext() = delete; public: - // [name] -> [address] + // [name] -> [address] map used for global symbol resolution. typedef std::map SymbolMapType; SymbolMapType GlobalSymbols; @@ -111,10 +111,15 @@ public: DR(DR) {} ~BinaryContext() {} + + /// Return a global symbol registered at a given \p Address. If no symbol + /// exists, create one with unique name using \p Prefix. + /// If there are multiple symbols registered at the \p Address, then + /// return the first one. + MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix); }; } // namespace flo - } // namespace llvm #endif diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 5ac5f9692a01..cdda810ae15c 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -10,6 +10,9 @@ //===----------------------------------------------------------------------===// +#include "BinaryBasicBlock.h" +#include "BinaryFunction.h" +#include "DataReader.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -23,15 +26,10 @@ #include #include -#include "BinaryBasicBlock.h" -#include "BinaryFunction.h" -#include "DataReader.h" - #undef DEBUG_TYPE #define DEBUG_TYPE "flo" namespace llvm { - namespace flo { BinaryBasicBlock * @@ -181,7 +179,16 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { break; } + if (MIA->isUnsupported(Instruction)) { + DEBUG(dbgs() << "FLO: unsupported instruction seen. Skipping function " + << getName() << ".\n"); + IsSimple = false; + break; + } + if (MIA->isIndirectBranch(Instruction)) { + DEBUG(dbgs() << "FLO: indirect branch seen. Skipping function " + << getName() << ".\n"); IsSimple = false; break; } @@ -231,21 +238,8 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { } else { // This is a call regardless of the opcode (e.g. tail call). IsCall = true; - // Check if we already have a symbol at this address. - std::string Name; - auto NI = BC.GlobalAddresses.find(InstructionTarget); - if (NI != BC.GlobalAddresses.end()) { - // Any registered name will do. - Name = NI->second; - } else { - // Create a new symbol at the destination. - Name = (Twine("FUNCat0x") + - Twine::utohexstr(InstructionTarget)).str(); - BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget, - Name)); - } - TargetSymbol = Ctx->getOrCreateSymbol(Name); - BC.GlobalSymbols[Name] = InstructionTarget; + TargetSymbol = BC.getOrCreateGlobalSymbol(InstructionTarget, + "FUNCat"); } } @@ -282,19 +276,8 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { IsSimple = false; break; } - std::string Name; - auto NI = BC.GlobalAddresses.find(TargetAddress); - if (NI != BC.GlobalAddresses.end()) { - Name = NI->second; - } else { - // Register new "data" symbol at the destination. - Name = (Twine("DATAat0x") + Twine::utohexstr(TargetAddress)).str(); - BC.GlobalAddresses.emplace(std::make_pair(TargetAddress, - Name)); - } - TargetSymbol = Ctx->getOrCreateSymbol(Name); - BC.GlobalSymbols[Name] = TargetAddress; - + // FIXME: check that the address is in data, not in code. + TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat"); MIA->replaceRIPOperandDisp( Instruction, MCOperand::createExpr( @@ -806,5 +789,4 @@ void BinaryFunction::solveOptimalLayout(bool DumpLayout) { } } // namespace flo - } // namespace llvm diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index f6cb9a882b10..32af380e1dc0 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -17,6 +17,8 @@ #ifndef LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H #define LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H +#include "BinaryBasicBlock.h" +#include "BinaryContext.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist.h" #include "llvm/MC/MCCodeEmitter.h" @@ -31,13 +33,9 @@ #include "llvm/Support/raw_ostream.h" #include -#include "BinaryBasicBlock.h" -#include "BinaryContext.h" - using namespace llvm::object; namespace llvm { - namespace flo { /// BinaryFunction is a representation of machine-level function. @@ -184,10 +182,10 @@ public: BasicBlocksLayout.end()); } - BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section, + BinaryFunction(std::string Name, SymbolRef Symbol, SectionRef Section, uint64_t Address, uint64_t Size, BinaryContext &BC) : Name(Name), Symbol(Symbol), Section(Section), Address(Address), - Size(Size), BC(BC), CodeSectionName((".text." + Name).str()) {} + Size(Size), BC(BC), CodeSectionName(".text." + Name) {} /// Perform optimal code layout based on edge frequencies making necessary /// adjustments to instructions at the end of basic blocks. @@ -411,7 +409,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, } } // namespace flo - } // namespace llvm #endif diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index b8720c3d9a64..a66505c5d097 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_tool(llvm-flo llvm-flo.cpp BinaryBasicBlock.cpp + BinaryContext.cpp BinaryFunction.cpp DataReader.cpp ) diff --git a/bolt/llvm-flo.cpp b/bolt/llvm-flo.cpp index 2035b1003c76..657ebb50b14e 100644 --- a/bolt/llvm-flo.cpp +++ b/bolt/llvm-flo.cpp @@ -13,6 +13,10 @@ // //===----------------------------------------------------------------------===// +#include "BinaryBasicBlock.h" +#include "BinaryContext.h" +#include "BinaryFunction.h" +#include "DataReader.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" @@ -45,12 +49,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" - -#include "BinaryBasicBlock.h" -#include "BinaryContext.h" -#include "BinaryFunction.h" -#include "DataReader.h" - #include #include #include @@ -62,7 +60,8 @@ using namespace llvm; using namespace object; using namespace flo; -// Tool options. +namespace opts { + static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::Required); @@ -78,6 +77,17 @@ FunctionNames("funcs", cl::desc("list of functions to optimize"), cl::value_desc("func1,func2,func3,...")); +static cl::list +SkipFunctionNames("skip_funcs", + cl::CommaSeparated, + cl::desc("list of functions to skip"), + cl::value_desc("func1,func2,func3,...")); + +static cl::opt +MaxFunctions("max_funcs", + cl::desc("maximum # of functions to overwrite"), + cl::Optional); + static cl::opt EliminateUnreachable("eliminate-unreachable", cl::desc("eliminate unreachable code"), @@ -99,6 +109,7 @@ DumpFunctions("dump-functions", cl::desc("dump parsed functions (debugging)"), static cl::opt DumpLayout("dump-layout", cl::desc("dump parsed flo data (debugging)"), cl::Hidden); +} // namespace opts static StringRef ToolName; @@ -283,7 +294,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { return; } - // Store all non-zero file symbols in this map for quick address lookup. + // Store all non-zero symbols in this map for a quick address lookup. std::map FileSymRefs; // Entry point to the binary. @@ -298,7 +309,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { // For local symbols we want to keep track of associated FILE symbol for // disambiguation by name. std::map BinaryFunctions; - StringRef FileSymbolName; + std::string FileSymbolName; for (const SymbolRef &Symbol : File->symbols()) { // Keep undefined symbols for pretty printing? if (Symbol.getFlags() & SymbolRef::SF_Undefined) @@ -329,7 +340,36 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { if (Name->empty()) continue; - BC->GlobalAddresses.emplace(std::make_pair(Address, *Name)); + // Disambiguate all local symbols before adding to symbol table. + // Since we don't know if we'll see a global with the same name, + // always modify the local name. + std::string UniqueName; + if (Symbol.getFlags() & SymbolRef::SF_Global) { + assert(BC->GlobalSymbols.find(*Name) == BC->GlobalSymbols.end() && + "global name not unique"); + UniqueName = *Name; + } else { + unsigned LocalCount = 1; + std::string LocalName = (*Name).str() + "/" + FileSymbolName + "/"; + while (BC->GlobalSymbols.find(LocalName + std::to_string(LocalCount)) != + BC->GlobalSymbols.end()) { + ++LocalCount; + } + UniqueName = LocalName + std::to_string(LocalCount); + } + + /// It's possible we are seeing a globalized local. Even though + /// we've made the name unique, LLVM might still treat it as local + /// if it has a "private global" prefix, e.g. ".L". Thus we have to + /// change the prefix to enforce global scope of the symbol. + if (StringRef(UniqueName).startswith(BC->AsmInfo->getPrivateGlobalPrefix())) + UniqueName = "PG." + UniqueName; + + // Add the name to global symbols map. + BC->GlobalSymbols[UniqueName] = Address; + + // Add to the reverse map. There could multiple names at the same address. + BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueName)); // Only consider ST_Function symbols for functions. Although this // assumption could be broken by assembly functions for which the type @@ -353,35 +393,12 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { continue; } - // Disambiguate local function name. Since we don't know if we'll see - // a global with the same name, always modify the local function name. - std::string UniqueFunctionName; - if (!(Symbol.getFlags() & SymbolRef::SF_Global)) { - unsigned LocalCount = 1; - auto LocalName = *Name + "/" + FileSymbolName + "/"; - while (BC->GlobalSymbols.find((LocalName + Twine(LocalCount)).str()) != - BC->GlobalSymbols.end()) { - ++LocalCount; - } - UniqueFunctionName = (LocalName + Twine(LocalCount)).str(); - } else { - auto I = BC->GlobalSymbols.find(*Name); - assert(I == BC->GlobalSymbols.end() && "global name not unique"); - UniqueFunctionName = *Name; - } - // Create the function and add to the map. BinaryFunctions.emplace( Address, - BinaryFunction(UniqueFunctionName, Symbol, *Section, Address, + BinaryFunction(UniqueName, Symbol, *Section, Address, SymbolSize, *BC) ); - - // Add the name to global symbols map. - BC->GlobalSymbols[UniqueFunctionName] = Address; - - // Add to the reverse map. - BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueFunctionName)); } // Disassemble every function and build it's control flow graph. @@ -404,8 +421,12 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { auto SymRefI = FileSymRefs.upper_bound(Function.getAddress()); if (SymRefI != FileSymRefs.end()) { auto MaxSize = SymRefI->first - Function.getAddress(); - assert(MaxSize >= Function.getSize() && - "symbol seen in the middle of the function"); + if (MaxSize < Function.getSize()) { + DEBUG(dbgs() << "FLO: symbol seen in the middle of the function " + << Function.getName() << ". Skipping.\n"); + Function.setSimple(false); + continue; + } Function.setMaxSize(MaxSize); } @@ -434,11 +455,11 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { if (!Function.buildCFG()) continue; - if (DumpFunctions) + if (opts::DumpFunctions) Function.print(errs(), true); } // Iterate over all functions - if (DumpFunctions) + if (opts::DumpFunctions) return; // Run optimization passes. @@ -452,7 +473,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { // FIXME: this wouldn't work with C++ exceptions until we implement // support for those as there will be "invisible" edges // in the graph. - if (EliminateUnreachable) { + if (opts::EliminateUnreachable) { bool IsFirst = true; for (auto &BB : Function) { if (!IsFirst && BB.pred_empty()) { @@ -465,8 +486,8 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { DEBUG(dbgs() << "*** After unreachable block elimination ***\n"); DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true)); } - if (ReorderBlocks) { - BFI.second.optimizeLayout(DumpLayout); + if (opts::ReorderBlocks) { + BFI.second.optimizeLayout(opts::DumpLayout); } } @@ -475,12 +496,14 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { // This is an object file, which we keep for debugging purposes. // Once we decide it's useless, we should create it in memory. std::unique_ptr Out = - llvm::make_unique(OutputFilename + ".o", + llvm::make_unique(opts::OutputFilename + ".o", EC, sys::fs::F_None); check_error(EC, "cannot create output object file"); std::unique_ptr RealOut = - llvm::make_unique(OutputFilename, EC, sys::fs::F_None, + llvm::make_unique(opts::OutputFilename, + EC, + sys::fs::F_None, 0777); check_error(EC, "cannot create output executable file"); @@ -513,18 +536,31 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { if (!Function.isSimple()) continue; - // Only overwrite functions from the list if non-empty. - if (!FunctionNames.empty()) { - bool IsValid = false; - for (auto &Name : FunctionNames) { + // Check against lists of functions from options if we should + // optimize the function. + bool IsValid = true; + if (!opts::FunctionNames.empty()) { + IsValid = false; + for (auto &Name : opts::FunctionNames) { if (Function.getName() == Name) { IsValid = true; break; } } - if (!IsValid) - continue; } + if (!IsValid) + continue; + + if (!opts::SkipFunctionNames.empty()) { + for (auto &Name : opts::SkipFunctionNames) { + if (Function.getName() == Name) { + IsValid = false; + break; + } + } + } + if (!IsValid) + continue; DEBUG(dbgs() << "FLO: generating code for function \"" << Function.getName() << "\"\n"); @@ -635,6 +671,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { Writer.setStream(RealOut->os()); // Overwrite function in the output file. + uint64_t CountOverwrittenFunctions = 0; for (auto &BFI : BinaryFunctions) { auto &Function = BFI.second; @@ -663,6 +700,13 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { BC->MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(), &Writer); RealOut->os().seek(Pos); + + ++CountOverwrittenFunctions; + + if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { + outs() << "FLO: maximum number of functions reached\n"; + break; + } } if (EntryPointFunction) { @@ -672,6 +716,9 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { DEBUG(dbgs() << "FLO: no entry point function was set\n"); } + outs() << "FLO: " << CountOverwrittenFunctions + << " out of " << BinaryFunctions.size() + << " functions were overwritten.\n"; // TODO: we should find a way to mark the binary as optimized by us. Out->keep(); @@ -702,35 +749,36 @@ int main(int argc, char **argv) { ToolName = argv[0]; - if (!sys::fs::exists(InputFilename)) - report_error(InputFilename, errc::no_such_file_or_directory); + if (!sys::fs::exists(opts::InputFilename)) + report_error(opts::InputFilename, errc::no_such_file_or_directory); std::unique_ptr DR(new DataReader(errs())); - if (!InputDataFilename.empty()) { - if (!sys::fs::exists(InputDataFilename)) - report_error(InputDataFilename, errc::no_such_file_or_directory); + if (!opts::InputDataFilename.empty()) { + if (!sys::fs::exists(opts::InputDataFilename)) + report_error(opts::InputDataFilename, errc::no_such_file_or_directory); // Attempt to read input flo data - auto ReaderOrErr = flo::DataReader::readPerfData(InputDataFilename, errs()); + auto ReaderOrErr = + flo::DataReader::readPerfData(opts::InputDataFilename, errs()); if (std::error_code EC = ReaderOrErr.getError()) - report_error(InputDataFilename, EC); + report_error(opts::InputDataFilename, EC); DR.reset(ReaderOrErr.get().release()); - if (DumpData) { + if (opts::DumpData) { DR->dump(); return EXIT_SUCCESS; } } // Attempt to open the binary. - ErrorOr> BinaryOrErr = createBinary(InputFilename); + ErrorOr> BinaryOrErr = createBinary(opts::InputFilename); if (std::error_code EC = BinaryOrErr.getError()) - report_error(InputFilename, EC); + report_error(opts::InputFilename, EC); Binary &Binary = *BinaryOrErr.get().getBinary(); if (ELFObjectFileBase *e = dyn_cast(&Binary)) { OptimizeFile(e, *DR.get()); } else { - report_error(InputFilename, object_error::invalid_file_type); + report_error(opts::InputFilename, object_error::invalid_file_type); } return EXIT_SUCCESS;