llvm/lld/MachO/Symbols.cpp
Greg McGary f27e4548fc [lld-macho] Implement ICF
ICF = Identical C(ode|OMDAT) Folding

This is the LLD ELF/COFF algorithm, adapted for MachO. So far, only `-icf all` is supported. In order to support `-icf safe`, we will need to port address-significance tables (`.addrsig` directives) to MachO, which will come in later diffs.

`check-{llvm,clang,lld}` have 0 regressions for `lld -icf all` vs. baseline ld64.

We only run ICF on `__TEXT,__text` for reasons explained in the block comment in `ConcatOutputSection.cpp`.

Here is the perf impact for linking `chromium_framekwork` on a Mac Pro (16-core Xeon W) for the non-ICF case vs. pre-ICF:
```
    N           Min           Max        Median           Avg        Stddev
x  20          4.27          4.44          4.34         4.349   0.043029977
+  20          4.37          4.46         4.405        4.4115   0.025188761
Difference at 95.0% confidence
        0.0625 +/- 0.0225658
        1.43711% +/- 0.518873%
        (Student's t, pooled s = 0.0352566)
```

Reviewed By: #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D103292
2021-06-17 10:07:44 -07:00

81 lines
2.7 KiB
C++

//===- Symbols.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Symbols.h"
#include "InputFiles.h"
#include "SyntheticSections.h"
using namespace llvm;
using namespace lld;
using namespace lld::macho;
// Returns a symbol for an error message.
static std::string demangle(StringRef symName) {
if (config->demangle)
return demangleItanium(symName);
return std::string(symName);
}
std::string lld::toString(const Symbol &sym) { return demangle(sym.getName()); }
std::string lld::toMachOString(const object::Archive::Symbol &b) {
return demangle(b.getName());
}
uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
bool Symbol::isLive() const {
if (isa<DylibSymbol>(this) || isa<Undefined>(this))
return used;
if (auto *d = dyn_cast<Defined>(this)) {
// Non-absolute symbols might be alive because their section is
// no_dead_strip or live_support. In that case, the section will know
// that it's live but `used` might be false. Non-absolute symbols always
// have to use the section's `live` bit as source of truth.
if (d->isAbsolute())
return used;
return d->isec->canonical()->isLive(d->value);
}
assert(!isa<CommonSymbol>(this) &&
"replaceCommonSymbols() runs before dead code stripping, and isLive() "
"should only be called after dead code stripping");
// Assume any other kind of symbol is live.
return true;
}
uint64_t Defined::getVA() const {
assert(isLive() && "this should only be called for live symbols");
if (isAbsolute())
return value;
if (!isec->canonical()->isFinal) {
// A target arch that does not use thunks ought never ask for
// the address of a function that has not yet been finalized.
assert(target->usesThunks());
// ConcatOutputSection::finalize() can seek the address of a
// function before its address is assigned. The thunking algorithm
// knows that unfinalized functions will be out of range, so it is
// expedient to return a contrived out-of-range address.
return TargetInfo::outOfRangeVA;
}
return isec->canonical()->getVA(value);
}
uint64_t DylibSymbol::getVA() const {
return isInStubs() ? getStubVA() : Symbol::getVA();
}
void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); }