llvm/bolt/lib/Core/Exceptions.cpp
serge-sans-paille 290e482342 Cleanup LLVMDWARFDebugInfo
As usual with that header cleanup series, some implicit dependencies now need to
be explicit:

llvm/DebugInfo/DWARF/DWARFContext.h no longer includes:
- "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
- "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
- "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
- "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
- "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
- "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
- "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
- "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
- "llvm/DebugInfo/DWARF/DWARFSection.h"
- "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
- "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"

Plus llvm/Support/Errc.h not included by a bunch of llvm/DebugInfo/DWARF/DWARF*.h files

Preprocessed lines to build llvm on my setup:
after: 1065629059
before: 1066621848

Which is a great diff!

Discourse thread: https://discourse.llvm.org/t/include-what-you-use-include-cleanup
Differential Revision: https://reviews.llvm.org/D119723
2022-02-15 09:16:03 +01:00

893 lines
32 KiB
C++

//===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements functions for handling C++ exception meta data.
//
// Some of the code is taken from examples/ExceptionDemo
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/Exceptions.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-exceptions"
using namespace llvm::dwarf;
namespace opts {
extern llvm::cl::OptionCategory BoltCategory;
extern llvm::cl::opt<unsigned> Verbosity;
static llvm::cl::opt<bool>
PrintExceptions("print-exceptions",
llvm::cl::desc("print exception handling data"),
llvm::cl::ZeroOrMore,
llvm::cl::Hidden,
llvm::cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
namespace bolt {
// Read and dump the .gcc_exception_table section entry.
//
// .gcc_except_table section contains a set of Language-Specific Data Areas -
// a fancy name for exception handling tables. There's one LSDA entry per
// function. However, we can't actually tell which function LSDA refers to
// unless we parse .eh_frame entry that refers to the LSDA.
// Then inside LSDA most addresses are encoded relative to the function start,
// so we need the function context in order to get to real addresses.
//
// The best visual representation of the tables comprising LSDA and
// relationships between them is illustrated at:
// https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
// Keep in mind that GCC implementation deviates slightly from that document.
//
// To summarize, there are 4 tables in LSDA: call site table, actions table,
// types table, and types index table (for indirection). The main table contains
// call site entries. Each call site includes a PC range that can throw an
// exception, a handler (landing pad), and a reference to an entry in the action
// table. The handler and/or action could be 0. The action entry is a head
// of a list of actions associated with a call site. The action table contains
// all such lists (it could be optimized to share list tails). Each action could
// be either to catch an exception of a given type, to perform a cleanup, or to
// propagate the exception after filtering it out (e.g. to make sure function
// exception specification is not violated). Catch action contains a reference
// to an entry in the type table, and filter action refers to an entry in the
// type index table to encode a set of types to filter.
//
// Call site table follows LSDA header. Action table immediately follows the
// call site table.
//
// Both types table and type index table start at the same location, but they
// grow in opposite directions (types go up, indices go down). The beginning of
// these tables is encoded in LSDA header. Sizes for both of the tables are not
// included anywhere.
//
// We have to parse all of the tables to determine their sizes. Then we have
// to parse the call site table and associate discovered information with
// actual call instructions and landing pad blocks.
//
// For the purpose of rewriting exception handling tables, we can reuse action,
// and type index tables in their original binary format.
//
// Type table could be encoded using position-independent references, and thus
// may require relocation.
//
// Ideally we should be able to re-write LSDA in-place, without the need to
// allocate a new space for it. Sadly there's no guarantee that the new call
// site table will be the same size as GCC uses uleb encodings for PC offsets.
//
// Note: some functions have LSDA entries with 0 call site entries.
void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
uint64_t LSDASectionAddress) {
assert(CurrentState == State::Disassembled && "unexpected function state");
if (!getLSDAAddress())
return;
DWARFDataExtractor Data(
StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
LSDASectionData.size()),
BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
assert(Data.isValidOffset(Offset) && "wrong LSDA address");
uint8_t LPStartEncoding = Data.getU8(&Offset);
uint64_t LPStart = 0;
if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
&Offset, LPStartEncoding, Offset + LSDASectionAddress))
LPStart = *MaybeLPStart;
assert(LPStart == 0 && "support for split functions not implemented");
const uint8_t TTypeEncoding = Data.getU8(&Offset);
size_t TTypeEncodingSize = 0;
uintptr_t TTypeEnd = 0;
if (TTypeEncoding != DW_EH_PE_omit) {
TTypeEnd = Data.getULEB128(&Offset);
TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
}
if (opts::PrintExceptions) {
outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
<< " for function " << *this << "]:\n";
outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
<< '\n';
outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
outs() << "TType End = " << TTypeEnd << '\n';
}
// Table to store list of indices in type table. Entries are uleb128 values.
const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
// Offset past the last decoded index.
uint64_t MaxTypeIndexTableOffset = 0;
// Max positive index used in type table.
unsigned MaxTypeIndex = 0;
// The actual type info table starts at the same location, but grows in
// opposite direction. TTypeEncoding is used to encode stored values.
const uint64_t TypeTableStart = Offset + TTypeEnd;
uint8_t CallSiteEncoding = Data.getU8(&Offset);
uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
uint64_t CallSiteTableStart = Offset;
uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
uint64_t CallSitePtr = CallSiteTableStart;
uint64_t ActionTableStart = CallSiteTableEnd;
if (opts::PrintExceptions) {
outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
outs() << "CallSite table length = " << CallSiteTableLength << '\n';
outs() << '\n';
}
this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
const uint64_t RangeBase = getAddress();
while (CallSitePtr < CallSiteTableEnd) {
uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
CallSitePtr + LSDASectionAddress);
uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
CallSitePtr + LSDASectionAddress);
uint64_t LandingPad = *Data.getEncodedPointer(
&CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
if (opts::PrintExceptions) {
outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
<< ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
<< "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
<< "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
<< '\n';
}
// Create a handler entry if necessary.
MCSymbol *LPSymbol = nullptr;
if (LandingPad) {
if (!getInstructionAtOffset(LandingPad)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
<< " not pointing to an instruction in function " << *this
<< " - ignoring.\n";
} else {
auto Label = Labels.find(LandingPad);
if (Label != Labels.end()) {
LPSymbol = Label->second;
} else {
LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
Labels[LandingPad] = LPSymbol;
}
}
}
// Mark all call instructions in the range.
auto II = Instructions.find(Start);
auto IE = Instructions.end();
assert(II != IE && "exception range not pointing to an instruction");
do {
MCInst &Instruction = II->second;
if (BC.MIB->isCall(Instruction) &&
!BC.MIB->getConditionalTailCall(Instruction)) {
assert(!BC.MIB->isInvoke(Instruction) &&
"overlapping exception ranges detected");
// Add extra operands to a call instruction making it an invoke from
// now on.
BC.MIB->addEHInfo(Instruction,
MCPlus::MCLandingPad(LPSymbol, ActionEntry));
}
++II;
} while (II != IE && II->first < Start + Length);
if (ActionEntry != 0) {
auto printType = [&](int Index, raw_ostream &OS) {
assert(Index > 0 && "only positive indices are valid");
uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
uint64_t TypeAddress =
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
TypeAddress = 0;
if (TypeAddress == 0) {
OS << "<all>";
return;
}
if (TTypeEncoding & DW_EH_PE_indirect) {
ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
OS << TypeSymBD->getName();
else
OS << "0x" << Twine::utohexstr(TypeAddress);
};
if (opts::PrintExceptions)
outs() << " actions: ";
uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
int64_t ActionType;
int64_t ActionNext;
const char *Sep = "";
do {
ActionType = Data.getSLEB128(&ActionPtr);
const uint32_t Self = ActionPtr;
ActionNext = Data.getSLEB128(&ActionPtr);
if (opts::PrintExceptions)
outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
if (ActionType == 0) {
if (opts::PrintExceptions)
outs() << "cleanup";
} else if (ActionType > 0) {
// It's an index into a type table.
MaxTypeIndex =
std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
if (opts::PrintExceptions) {
outs() << "catch type ";
printType(ActionType, outs());
}
} else { // ActionType < 0
if (opts::PrintExceptions)
outs() << "filter exception types ";
const char *TSep = "";
// ActionType is a negative *byte* offset into *uleb128-encoded* table
// of indices with base 1.
// E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
// encoded using uleb128 thus we cannot directly dereference them.
uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
if (opts::PrintExceptions) {
outs() << TSep;
printType(Index, outs());
TSep = ", ";
}
}
MaxTypeIndexTableOffset = std::max(
MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
}
Sep = "; ";
ActionPtr = Self + ActionNext;
} while (ActionNext);
if (opts::PrintExceptions)
outs() << '\n';
}
}
if (opts::PrintExceptions)
outs() << '\n';
assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
Data.getData().size() &&
"LSDA entry has crossed section boundary");
if (TTypeEnd) {
LSDAActionTable = LSDASectionData.slice(
ActionTableStart, TypeIndexTableStart -
MaxTypeIndex * TTypeEncodingSize -
ActionTableStart);
for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
uint64_t TypeAddress =
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
TypeAddress = 0;
if (TTypeEncoding & DW_EH_PE_indirect) {
LSDATypeAddressTable.emplace_back(TypeAddress);
if (TypeAddress) {
ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
}
LSDATypeTable.emplace_back(TypeAddress);
}
LSDATypeIndexTable =
LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
}
}
void BinaryFunction::updateEHRanges() {
if (getSize() == 0)
return;
assert(CurrentState == State::CFG_Finalized && "unexpected state");
// Build call sites table.
struct EHInfo {
const MCSymbol *LP; // landing pad
uint64_t Action;
};
// If previous call can throw, this is its exception handler.
EHInfo PreviousEH = {nullptr, 0};
// Marker for the beginning of exceptions range.
const MCSymbol *StartRange = nullptr;
// Indicates whether the start range is located in a cold part.
bool IsStartInCold = false;
// Have we crossed hot/cold border for split functions?
bool SeenCold = false;
// Sites to update - either regular or cold.
CallSitesType *Sites = &CallSites;
for (BinaryBasicBlock *&BB : BasicBlocksLayout) {
if (BB->isCold() && !SeenCold) {
SeenCold = true;
// Close the range (if any) and change the target call sites.
if (StartRange) {
Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
PreviousEH.LP, PreviousEH.Action});
}
Sites = &ColdCallSites;
// Reset the range.
StartRange = nullptr;
PreviousEH = {nullptr, 0};
}
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (!BC.MIB->isCall(*II))
continue;
// Instruction can throw an exception that should be handled.
const bool Throws = BC.MIB->isInvoke(*II);
// Ignore the call if it's a continuation of a no-throw gap.
if (!Throws && !StartRange)
continue;
// Extract exception handling information from the instruction.
const MCSymbol *LP = nullptr;
uint64_t Action = 0;
if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
std::tie(LP, Action) = *EHInfo;
// No action if the exception handler has not changed.
if (Throws && StartRange && PreviousEH.LP == LP &&
PreviousEH.Action == Action)
continue;
// Same symbol is used for the beginning and the end of the range.
const MCSymbol *EHSymbol;
MCInst EHLabel;
{
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
}
II = std::next(BB->insertPseudoInstr(II, EHLabel));
// At this point we could be in one of the following states:
//
// I. Exception handler has changed and we need to close previous range
// and start a new one.
//
// II. Start a new exception range after the gap.
//
// III. Close current exception range and start a new gap.
const MCSymbol *EndRange;
if (StartRange) {
// I, III:
EndRange = EHSymbol;
} else {
// II:
StartRange = EHSymbol;
IsStartInCold = SeenCold;
EndRange = nullptr;
}
// Close the previous range.
if (EndRange) {
Sites->emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}
if (Throws) {
// I, II:
StartRange = EHSymbol;
IsStartInCold = SeenCold;
PreviousEH = EHInfo{LP, Action};
} else {
StartRange = nullptr;
}
}
}
// Check if we need to close the range.
if (StartRange) {
assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
const MCSymbol *EndRange =
IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
Sites->emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}
}
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
// Prepare FDEs for fast lookup
for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
// Skip CIEs.
if (!CurFDE)
continue;
// There could me multiple FDEs with the same initial address, and perhaps
// different sizes (address ranges). Use the first entry with non-zero size.
auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
if (CurFDE->getAddressRange()) {
if (FDEI->second->getAddressRange() == 0) {
FDEI->second = CurFDE;
} else if (opts::Verbosity > 0) {
errs() << "BOLT-WARNING: different FDEs for function at 0x"
<< Twine::utohexstr(FDEI->first)
<< " detected; sizes: " << FDEI->second->getAddressRange()
<< " and " << CurFDE->getAddressRange() << '\n';
}
}
} else {
FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
}
}
}
bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
uint64_t Address = Function.getAddress();
auto I = FDEs.find(Address);
// Ignore zero-length FDE ranges.
if (I == FDEs.end() || !I->second->getAddressRange())
return true;
const FDE &CurFDE = *I->second;
Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
Function.setLSDAAddress(LSDA ? *LSDA : 0);
uint64_t Offset = 0;
uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
Function.setPersonalityFunction(
*CurFDE.getLinkedCIE()->getPersonalityAddress());
Function.setPersonalityEncoding(
*CurFDE.getLinkedCIE()->getPersonalityEncoding());
}
auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
DataAlignment](
const CFIProgram::Instruction &Instr) {
uint8_t Opcode = Instr.Opcode;
if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
switch (Instr.Opcode) {
case DW_CFA_nop:
break;
case DW_CFA_advance_loc4:
case DW_CFA_advance_loc2:
case DW_CFA_advance_loc1:
case DW_CFA_advance_loc:
// Advance our current address
Offset += CodeAlignment * int64_t(Instr.Ops[0]);
break;
case DW_CFA_offset_extended_sf:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::createOffset(
nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
break;
case DW_CFA_offset_extended:
case DW_CFA_offset:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
DataAlignment * Instr.Ops[1]));
break;
case DW_CFA_restore_extended:
case DW_CFA_restore:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
break;
case DW_CFA_set_loc:
assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
assert(Instr.Ops[0] <= Address + Function.getSize() &&
"set_loc out of function bounds");
Offset = Instr.Ops[0] - Address;
break;
case DW_CFA_undefined:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
break;
case DW_CFA_same_value:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
break;
case DW_CFA_register:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
Instr.Ops[1]));
break;
case DW_CFA_remember_state:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRememberState(nullptr));
break;
case DW_CFA_restore_state:
Function.addCFIInstruction(Offset,
MCCFIInstruction::createRestoreState(nullptr));
break;
case DW_CFA_def_cfa:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
break;
case DW_CFA_def_cfa_sf:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
DataAlignment * int64_t(Instr.Ops[1])));
break;
case DW_CFA_def_cfa_register:
Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
nullptr, Instr.Ops[0]));
break;
case DW_CFA_def_cfa_offset:
Function.addCFIInstruction(
Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
break;
case DW_CFA_def_cfa_offset_sf:
Function.addCFIInstruction(
Offset, MCCFIInstruction::cfiDefCfaOffset(
nullptr, DataAlignment * int64_t(Instr.Ops[0])));
break;
case DW_CFA_GNU_args_size:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
Function.setUsesGnuArgsSize();
break;
case DW_CFA_val_offset_sf:
case DW_CFA_val_offset:
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
}
return false;
case DW_CFA_def_cfa_expression:
case DW_CFA_val_expression:
case DW_CFA_expression: {
StringRef ExprBytes = Instr.Expression->getData();
std::string Str;
raw_string_ostream OS(Str);
// Manually encode this instruction using CFI escape
OS << Opcode;
if (Opcode != DW_CFA_def_cfa_expression)
encodeULEB128(Instr.Ops[0], OS);
encodeULEB128(ExprBytes.size(), OS);
OS << ExprBytes;
Function.addCFIInstruction(
Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
break;
}
case DW_CFA_MIPS_advance_loc8:
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
return false;
case DW_CFA_GNU_window_save:
case DW_CFA_lo_user:
case DW_CFA_hi_user:
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
"unimplemented\n";
}
return false;
default:
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
<< '\n';
}
return false;
}
return true;
};
for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
if (!decodeFrameInstruction(Instr))
return false;
for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
if (!decodeFrameInstruction(Instr))
return false;
return true;
}
std::vector<char> CFIReaderWriter::generateEHFrameHeader(
const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
uint64_t EHFrameHeaderAddress,
std::vector<uint64_t> &FailedAddresses) const {
// Common PC -> FDE map to be written into .eh_frame_hdr.
std::map<uint64_t, uint64_t> PCToFDE;
// Presort array for binary search.
std::sort(FailedAddresses.begin(), FailedAddresses.end());
// Initialize PCToFDE using NewEHFrame.
for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
if (FDE == nullptr)
continue;
const uint64_t FuncAddress = FDE->getInitialLocation();
const uint64_t FDEAddress =
NewEHFrame.getEHFrameAddress() + FDE->getOffset();
// Ignore unused FDEs.
if (FuncAddress == 0)
continue;
// Add the address to the map unless we failed to write it.
if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
FuncAddress)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
<< Twine::utohexstr(FuncAddress) << " is at 0x"
<< Twine::utohexstr(FDEAddress) << '\n');
PCToFDE[FuncAddress] = FDEAddress;
}
};
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
<< std::distance(NewEHFrame.entries().begin(),
NewEHFrame.entries().end())
<< " entries\n");
// Add entries from the original .eh_frame corresponding to the functions
// that we did not update.
for (const dwarf::FrameEntry &Entry : OldEHFrame) {
const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
if (FDE == nullptr)
continue;
const uint64_t FuncAddress = FDE->getInitialLocation();
const uint64_t FDEAddress =
OldEHFrame.getEHFrameAddress() + FDE->getOffset();
// Add the address if we failed to write it.
if (PCToFDE.count(FuncAddress) == 0) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
<< Twine::utohexstr(FuncAddress) << " is at 0x"
<< Twine::utohexstr(FDEAddress) << '\n');
PCToFDE[FuncAddress] = FDEAddress;
}
};
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
<< std::distance(OldEHFrame.entries().begin(),
OldEHFrame.entries().end())
<< " entries\n");
// Generate a new .eh_frame_hdr based on the new map.
// Header plus table of entries of size 8 bytes.
std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
// Version is 1.
EHFrameHeader[0] = 1;
// Encoding of the eh_frame pointer.
EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
// Encoding of the count field to follow.
EHFrameHeader[2] = DW_EH_PE_udata4;
// Encoding of the table entries - 4-byte offset from the start of the header.
EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
// Address of eh_frame. Use the new one.
support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
// Number of entries in the table (FDE count).
support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
// Write the table at offset 12.
char *Ptr = EHFrameHeader.data();
uint32_t Offset = 12;
for (const auto &PCI : PCToFDE) {
int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
Offset += 4;
int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
Offset += 4;
}
return EHFrameHeader;
}
Error EHFrameParser::parseCIE(uint64_t StartOffset) {
uint8_t Version = Data.getU8(&Offset);
const char *Augmentation = Data.getCStr(&Offset);
StringRef AugmentationString(Augmentation ? Augmentation : "");
uint8_t AddressSize =
Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
Data.setAddressSize(AddressSize);
// Skip segment descriptor size
if (Version >= 4)
Offset += 1;
// Skip code alignment factor
Data.getULEB128(&Offset);
// Skip data alignment
Data.getSLEB128(&Offset);
// Skip return address register
if (Version == 1)
Offset += 1;
else
Data.getULEB128(&Offset);
uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
// Walk the augmentation string to get all the augmentation data.
for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
switch (AugmentationString[i]) {
default:
return createStringError(
errc::invalid_argument,
"unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
case 'P': {
uint32_t PersonalityEncoding = Data.getU8(&Offset);
Optional<uint64_t> Personality =
Data.getEncodedPointer(&Offset, PersonalityEncoding,
EHFrameAddress ? EHFrameAddress + Offset : 0);
// Patch personality address
if (Personality)
PatcherCallback(*Personality, Offset, PersonalityEncoding);
break;
}
case 'R':
FDEPointerEncoding = Data.getU8(&Offset);
break;
case 'z':
if (i)
return createStringError(
errc::invalid_argument,
"'z' must be the first character at 0x%" PRIx64, StartOffset);
// Skip augmentation length
Data.getULEB128(&Offset);
break;
case 'S':
case 'B':
break;
}
}
Entries.emplace_back(std::make_unique<CIEInfo>(
FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
CIEs[StartOffset] = &*Entries.back();
return Error::success();
}
Error EHFrameParser::parseFDE(uint64_t CIEPointer,
uint64_t StartStructureOffset) {
Optional<uint64_t> LSDAAddress;
CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
// The address size is encoded in the CIE we reference.
if (!Cie)
return createStringError(errc::invalid_argument,
"parsing FDE data at 0x%" PRIx64
" failed due to missing CIE",
StartStructureOffset);
// Patch initial location
if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
EHFrameAddress + Offset)) {
PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
}
// Skip address range
Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
// Process augmentation data for this FDE.
StringRef AugmentationString = Cie->AugmentationString;
if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
// Skip augmentation length
Data.getULEB128(&Offset);
LSDAAddress =
Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
EHFrameAddress ? Offset + EHFrameAddress : 0);
// Patch LSDA address
PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
}
return Error::success();
}
Error EHFrameParser::parse() {
while (Data.isValidOffset(Offset)) {
const uint64_t StartOffset = Offset;
uint64_t Length;
DwarfFormat Format;
std::tie(Length, Format) = Data.getInitialLength(&Offset);
// If the Length is 0, then this CIE is a terminator
if (Length == 0)
break;
const uint64_t StartStructureOffset = Offset;
const uint64_t EndStructureOffset = Offset + Length;
Error Err = Error::success();
const uint64_t Id = Data.getRelocatedValue(4, &Offset,
/*SectionIndex=*/nullptr, &Err);
if (Err)
return Err;
if (!Id) {
if (Error Err = parseCIE(StartOffset))
return Err;
} else {
if (Error Err = parseFDE(Id, StartStructureOffset))
return Err;
}
Offset = EndStructureOffset;
}
return Error::success();
}
Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
PatcherCallbackTy PatcherCallback) {
EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
return Parser.parse();
}
} // namespace bolt
} // namespace llvm