[lld-macho] Support dynamic linking of thread-locals

References to symbols in dylibs work very similarly regardless of
whether the symbol is a TLV. The main difference is that we have a
separate `__thread_ptrs` section that acts as the GOT for these
thread-locals.

We can identify thread-locals in dylibs by a flag in their export trie
entries, and we cross-check it with the relocations that refer to them
to ensure that we are not using a GOT relocation to reference a
thread-local (or vice versa).

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D85081
This commit is contained in:
Jez Ng 2020-08-12 19:50:09 -07:00
parent 65277126bf
commit 3c9100fb78
15 changed files with 199 additions and 40 deletions

View file

@ -224,6 +224,9 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
// TODO: implement mov -> lea relaxation for non-dynamic symbols
case X86_64_RELOC_GOT:
in.got->addEntry(sym);
if (sym.isTlv())
error("found GOT relocation referencing thread-local variable in " +
toString(isec));
break;
case X86_64_RELOC_BRANCH: {
// TODO: weak dysyms should go into the weak binding section instead
@ -248,10 +251,20 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
case X86_64_RELOC_SIGNED_4:
break;
case X86_64_RELOC_TLV:
if (isa<DylibSymbol>(&sym))
error("relocations to thread-local dylib symbols not yet implemented");
else
if (isa<DylibSymbol>(&sym)) {
in.tlvPointers->addEntry(sym);
} else {
assert(isa<Defined>(&sym));
// TLV relocations on x86_64 are always used with a movq opcode, which
// can be converted to leaq opcodes if they reference a defined symbol.
// (This is in contrast to GOT relocations, which can be used with
// non-movq opcodes.) As such, there is no need to add an entry to
// tlvPointers here.
}
if (!sym.isTlv())
error(
"found X86_64_RELOC_TLV referencing a non-thread-local variable in " +
toString(isec));
break;
case X86_64_RELOC_SUBTRACTOR:
fatal("TODO: handle relocation type " + std::to_string(r.type));
@ -279,7 +292,7 @@ uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym,
return sym.getVA();
case X86_64_RELOC_TLV: {
if (isa<DylibSymbol>(&sym))
error("relocations to thread-local dylib symbols not yet implemented");
return in.tlvPointers->addr + sym.gotIndex * WordSize;
// Convert the movq to a leaq.
assert(isa<Defined>(&sym));

View file

@ -59,11 +59,14 @@ struct Edge {
struct ExportInfo {
uint64_t address;
uint8_t flags;
explicit ExportInfo(const Symbol &sym)
: address(sym.getVA()),
flags(sym.isWeakDef() ? EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION : 0) {}
// TODO: Add proper support for re-exports & stub-and-resolver flags.
uint8_t flags = 0;
explicit ExportInfo(const Symbol &sym) : address(sym.getVA()) {
if (sym.isWeakDef())
flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
if (sym.isTlv())
flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
// TODO: Add proper support for re-exports & stub-and-resolver flags.
}
};
} // namespace

View file

@ -363,8 +363,9 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
parseTrie(buf + c->export_off, c->export_size,
[&](const Twine &name, uint64_t flags) {
bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
symbols.push_back(
symtab->addDylib(saver.save(name), umbrella, isWeakDef));
bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
symbols.push_back(symtab->addDylib(saver.save(name), umbrella,
isWeakDef, isTlv));
});
} else {
error("LC_DYLD_INFO_ONLY not found in " + getName());
@ -403,11 +404,12 @@ DylibFile::DylibFile(std::shared_ptr<llvm::MachO::InterfaceFile> interface,
dylibName = saver.save(interface->getInstallName());
// TODO(compnerd) filter out symbols based on the target platform
// TODO: handle weak defs
// TODO: handle weak defs, thread locals
for (const auto symbol : interface->symbols())
if (symbol->getArchitectures().has(config->arch))
symbols.push_back(symtab->addDylib(saver.save(symbol->getName()),
umbrella, /*isWeakDef=*/false));
umbrella, /*isWeakDef=*/false,
/*isTlv=*/false));
// TODO(compnerd) properly represent the hierarchy of the documents as it is
// in theory possible to have re-exported dylibs from re-exported dylibs which
// should be parent'ed to the child.

View file

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "InputSection.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "Target.h"
@ -55,3 +56,7 @@ void InputSection::writeTo(uint8_t *buf) {
target->relocateOne(buf + r.offset, r, val);
}
}
std::string lld::toString(const InputSection *isec) {
return (toString(isec->file) + ":(" + isec->name + ")").str();
}

View file

@ -74,6 +74,9 @@ public:
extern std::vector<InputSection *> inputSections;
} // namespace macho
std::string toString(const macho::InputSection *);
} // namespace lld
#endif

View file

@ -69,14 +69,15 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
return s;
}
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef) {
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool isTlv) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
if (wasInserted || isa<Undefined>(s) ||
(isa<DylibSymbol>(s) && !isWeakDef && s->isWeakDef()))
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef);
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, isTlv);
return s;
}

View file

@ -36,7 +36,7 @@ public:
Symbol *addUndefined(StringRef name);
Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef);
Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
Symbol *addLazy(StringRef name, ArchiveFile *file,
const llvm::object::Archive::Symbol &sym);

View file

@ -55,6 +55,11 @@ public:
virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); }
virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
// The index of this symbol in the GOT or the TLVPointer section, depending
// on whether it is a thread-local. A given symbol cannot be referenced by
// both these sections at once.
uint32_t gotIndex = UINT32_MAX;
protected:
@ -72,6 +77,8 @@ public:
bool isWeakDef() const override { return weakDef; }
bool isTlv() const override { return isThreadLocalVariables(isec->flags); }
static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
uint64_t getVA() const override { return isec->getVA() + value; }
@ -96,11 +103,13 @@ public:
class DylibSymbol : public Symbol {
public:
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef)
: Symbol(DylibKind, name), file(file), weakDef(isWeakDef) {}
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
: Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {}
bool isWeakDef() const override { return weakDef; }
bool isTlv() const override { return tlv; }
static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
DylibFile *file;
@ -109,6 +118,7 @@ public:
private:
const bool weakDef;
const bool tlv;
};
class LazySymbol : public Symbol {

View file

@ -82,22 +82,21 @@ void MachHeaderSection::writeTo(uint8_t *buf) const {
PageZeroSection::PageZeroSection()
: SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
GotSection::GotSection()
: SyntheticSection(segment_names::dataConst, section_names::got) {
NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
const char *name)
: SyntheticSection(segname, name) {
align = 8;
flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
// TODO: section_64::reserved1 should be an index into the indirect symbol
// table, which we do not currently emit
}
void GotSection::addEntry(Symbol &sym) {
void NonLazyPointerSectionBase::addEntry(Symbol &sym) {
if (entries.insert(&sym)) {
assert(sym.gotIndex == UINT32_MAX);
sym.gotIndex = entries.size() - 1;
}
}
void GotSection::writeTo(uint8_t *buf) const {
void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
for (size_t i = 0, n = entries.size(); i < n; ++i)
if (auto *defined = dyn_cast<Defined>(entries[i]))
write64le(&buf[i * WordSize], defined->getVA());
@ -107,7 +106,8 @@ BindingSection::BindingSection()
: LinkEditSection(segment_names::linkEdit, section_names::binding) {}
bool BindingSection::isNeeded() const {
return bindings.size() != 0 || in.got->isNeeded();
return bindings.size() != 0 || in.got->isNeeded() ||
in.tlvPointers->isNeeded();
}
namespace {
@ -138,7 +138,6 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
lastBinding.segment = seg;
lastBinding.offset = offset;
} else if (lastBinding.offset != offset) {
assert(lastBinding.offset <= offset);
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
encodeULEB128(offset - lastBinding.offset, os);
lastBinding.offset = offset;
@ -169,6 +168,22 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
lastBinding.offset += WordSize;
}
static bool encodeNonLazyPointerSection(NonLazyPointerSectionBase *osec,
Binding &lastBinding,
raw_svector_ostream &os) {
bool didEncode = false;
size_t idx = 0;
for (const Symbol *sym : osec->getEntries()) {
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
didEncode = true;
encodeBinding(*dysym, osec, idx * WordSize, /*addend=*/0, lastBinding,
os);
}
++idx;
}
return didEncode;
}
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
// interprets to update a record with the following fields:
// * segment index (of the segment to write the symbol addresses to, typically
@ -185,15 +200,8 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
void BindingSection::finalizeContents() {
raw_svector_ostream os{contents};
Binding lastBinding;
bool didEncode = false;
size_t gotIdx = 0;
for (const Symbol *sym : in.got->getEntries()) {
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
didEncode = true;
encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
}
++gotIdx;
}
bool didEncode = encodeNonLazyPointerSection(in.got, lastBinding, os);
didEncode |= encodeNonLazyPointerSection(in.tlvPointers, lastBinding, os);
// Sorting the relocations by segment and address allows us to encode them
// more compactly.

View file

@ -13,6 +13,7 @@
#include "ExportTrie.h"
#include "InputSection.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "Target.h"
#include "llvm/ADT/SetVector.h"
@ -31,6 +32,7 @@ constexpr const char export_[] = "__export";
constexpr const char symbolTable[] = "__symbol_table";
constexpr const char stringTable[] = "__string_table";
constexpr const char got[] = "__got";
constexpr const char threadPtrs[] = "__thread_ptrs";
} // namespace section_names
@ -95,11 +97,13 @@ public:
void writeTo(uint8_t *buf) const override {}
};
// This section will be populated by dyld with addresses to non-lazily-loaded
// dylib symbols.
class GotSection : public SyntheticSection {
// This is the base class for the GOT and TLVPointer sections, which are nearly
// functionally identical -- they will both be populated by dyld with addresses
// to non-lazily-loaded dylib symbols. The main difference is that the
// TLVPointerSection stores references to thread-local variables.
class NonLazyPointerSectionBase : public SyntheticSection {
public:
GotSection();
NonLazyPointerSectionBase(const char *segname, const char *name);
const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
@ -115,6 +119,23 @@ private:
llvm::SetVector<const Symbol *> entries;
};
class GotSection : public NonLazyPointerSectionBase {
public:
GotSection()
: NonLazyPointerSectionBase(segment_names::dataConst,
section_names::got) {
// TODO: section_64::reserved1 should be an index into the indirect symbol
// table, which we do not currently emit
}
};
class TlvPointerSection : public NonLazyPointerSectionBase {
public:
TlvPointerSection()
: NonLazyPointerSectionBase(segment_names::data,
section_names::threadPtrs) {}
};
struct BindingEntry {
const DylibSymbol *dysym;
const InputSection *isec;
@ -297,6 +318,7 @@ struct InStruct {
MachHeaderSection *header = nullptr;
BindingSection *binding = nullptr;
GotSection *got = nullptr;
TlvPointerSection *tlvPointers = nullptr;
LazyPointerSection *lazyPointers = nullptr;
StubsSection *stubs = nullptr;
StubHelperSection *stubHelper = nullptr;

View file

@ -540,6 +540,7 @@ void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
in.binding = make<BindingSection>();
in.got = make<GotSection>();
in.tlvPointers = make<TlvPointerSection>();
in.lazyPointers = make<LazyPointerSection>();
in.stubs = make<StubsSection>();
in.stubHelper = make<StubHelperSection>();

View file

@ -0,0 +1,23 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libtlv.s -o %t/libtlv.o
# RUN: lld -flavor darwinnew -dylib -install_name @executable_path/libtlv.dylib \
# RUN: -Z -L%S/../Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/libtlv.dylib %t/libtlv.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: not lld -flavor darwinnew -Z -L%S/../Inputs/MacOSX.sdk/usr/lib -lSystem -L%t -ltlv -o /dev/null %t/test.o 2>&1 | FileCheck %s -DFILE=%t/test.o
# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text)
#--- libtlv.s
.section __DATA,__thread_vars,thread_local_variables
.globl _foo
_foo:
#--- test.s
.text
.globl _main
_main:
movq _foo@GOTPCREL(%rip), %rax
ret

View file

@ -0,0 +1,14 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o
# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text)
.text
.globl _main
_main:
movq _foo@GOTPCREL(%rip), %rax
ret
.section __DATA,__thread_vars,thread_local_variables
_foo:

View file

@ -0,0 +1,14 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o
# CHECK: error: found X86_64_RELOC_TLV referencing a non-thread-local variable in [[FILE]]:(__text)
.text
.globl _main
_main:
leaq _foo@TLVP(%rip), %rax
ret
.data
_foo:

View file

@ -0,0 +1,40 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libtlv.s -o %t/libtlv.o
# RUN: lld -flavor darwinnew -dylib -install_name @executable_path/libtlv.dylib \
# RUN: -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/libtlv.dylib %t/libtlv.o
# RUN: llvm-objdump --exports-trie -d --no-show-raw-insn %t/libtlv.dylib | FileCheck %s --check-prefix=DYLIB
# DYLIB-DAG: _foo [per-thread]
# DYLIB-DAG: _bar [per-thread]
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: lld -flavor darwinnew -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -L%t -ltlv %t/test.o -o %t/test
# RUN: llvm-objdump --bind -d --no-show-raw-insn %t/test | FileCheck %s
# CHECK: movq [[#]](%rip), %rax # [[#%x, FOO:]]
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%x, BAR:]]
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%x, BAZ:]]
# CHECK-LABEL: Bind table:
# CHECK-DAG: __DATA __thread_ptrs 0x{{0*}}[[#%x, FOO]] pointer 0 libtlv _foo
# CHECK-DAG: __DATA __thread_ptrs 0x{{0*}}[[#%x, BAR]] pointer 0 libtlv _bar
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, BAZ]] pointer 0 libtlv _baz
#--- libtlv.s
.section __DATA,__thread_vars,thread_local_variables
.globl _foo, _bar, _baz
_foo:
_bar:
.text
_baz:
#--- test.s
.globl _main
_main:
mov _foo@TLVP(%rip), %rax
mov _bar@TLVP(%rip), %rax
## Add a GOT entry to make sure we don't mix it up with TLVs
mov _baz@GOTPCREL(%rip), %rax
ret