llvm/llvm-libgcc/generate_version_script.py
Christopher Di Bella c5a20b5182 [llvm-libgcc] initial commit
Note: the term "libgcc" refers to the all of `libgcc.a`, `libgcc_eh.a`,
and `libgcc_s.so`.

Enabling libunwind as a replacement for libgcc on Linux has proven to be
challenging since libgcc_s.so is a required dependency in the [Linux
standard base][5]. Some software is transitively dependent on libgcc
because glibc makes hardcoded calls to functions in libgcc_s. For example,
the function `__GI___backtrace` eventually makes its way to a [hardcoded
dlopen to libgcc_s' _Unwind_Backtrace][1]. Since libgcc_{eh.a,s.so} and
libunwind have the same ABI, but different implementations, the two
libraries end up [cross-talking, which ultimately results in a
segfault][2].

To solve this problem, libunwind needs to build a “libgcc”. That is, link
the necessary functions from compiler-rt and libunwind into an archive
and shared object that advertise themselves as `libgcc.a`, `libgcc_eh.a`,
and `libgcc_s.so`, so that glibc’s baked calls are diverted to the
correct objects in memory. Fortunately for us, compiler-rt and libunwind
use the same ABI as the libgcc family, so the problem is solvable at the
llvm-project configuration level: no program source needs to be edited.
Thus, the end result is for a user to configure their LLVM build with a
flag that indicates they want to archive compiler-rt/unwind as libgcc.
We achieve this by compiling libunwind with all the symbols necessary
for compiler-rt to emulate the libgcc family, and then generate symlinks
named for our "libgcc" that point to their corresponding libunwind
counterparts.

We alternatively considered patching glibc so that the source doesn't
directly refer to libgcc, but rather _defaults_ to libgcc, so that a
system preferring compiler-rt/libunwind can point to these libraries
at the config stage instead. Even if we modified the Linux standard
base, this alternative won't work because binaries that are built using
libgcc will still end up having crosstalk between the differing
implementations.

This problem has been solved in this manner for [FreeBSD][3], and this
CL has been tested against [Chrome OS][4].

[1]: https://github.com/bminor/glibc/blob/master/sysdeps/arm/backtrace.c#L68
[2]: https://bugs.chromium.org/p/chromium/issues/detail?id=1162190#c16
[3]: https://github.com/freebsd/freebsd-src/tree/main/lib/libgcc_s
[4]: https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2945947
[5]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/libgcc-s.html

Differential Revision: https://reviews.llvm.org/D108416
2022-02-16 17:06:45 +00:00

132 lines
4.4 KiB
Python
Executable file

#!/usr/bin/env python3
# Generates a version script for an architecture so that it can be incorporated
# into gcc_s.ver.
from collections import defaultdict
from itertools import chain
import argparse, subprocess, sys, os
def split_suffix(symbol):
"""
Splits a symbol such as `__gttf2@GCC_3.0` into a triple representing its
function name (__gttf2), version name (GCC_3.0), and version number (300).
The version number acts as a priority. Since earlier versions are more
accessible and are likely to be used more, the lower the number is, the higher
its priortiy. A symbol that has a '@@' instead of '@' has been designated by
the linker as the default symbol, and is awarded a priority of -1.
"""
if '@' not in symbol:
return None
data = [i for i in filter(lambda s: s, symbol.split('@'))]
_, version = data[-1].split('_')
version = version.replace('.', '')
priority = -1 if '@@' in symbol else int(version + '0' *
(3 - len(version)))
return data[0], data[1], priority
def invert_mapping(symbol_map):
"""Transforms a map from Key->Value to Value->Key."""
store = defaultdict(list)
for symbol, (version, _) in symbol_map.items():
store[version].append(symbol)
result = []
for k, v in store.items():
v.sort()
result.append((k, v))
result.sort(key=lambda x: x[0])
return result
def intersection(llvm, gcc):
"""
Finds the intersection between the symbols extracted from compiler-rt.a/libunwind.a
and libgcc_s.so.1.
"""
common_symbols = {}
for i in gcc:
suffix_triple = split_suffix(i)
if not suffix_triple:
continue
symbol, version_name, version_number = suffix_triple
if symbol in llvm:
if symbol not in common_symbols:
common_symbols[symbol] = (version_name, version_number)
continue
if version_number < common_symbols[symbol][1]:
common_symbols[symbol] = (version_name, version_number)
return invert_mapping(common_symbols)
def find_function_names(path):
"""
Runs readelf on a binary and reduces to only defined functions. Equivalent to
`llvm-readelf --wide ${path} | grep 'FUNC' | grep -v 'UND' | awk '{print $8}'`.
"""
result = subprocess.run(args=['llvm-readelf', '-su', path],
capture_output=True)
if result.returncode != 0:
print(result.stderr.decode('utf-8'), file=sys.stderr)
sys.exit(1)
stdout = result.stdout.decode('utf-8')
stdout = filter(lambda x: 'FUNC' in x and 'UND' not in x,
stdout.split('\n'))
stdout = chain(
map(lambda x: filter(None, x), (i.split(' ') for i in stdout)))
return [list(i)[7] for i in stdout]
def to_file(versioned_symbols):
path = f'{os.path.dirname(os.path.realpath(__file__))}/new-gcc_s-symbols'
with open(path, 'w') as f:
f.write('Do not check this version script in: you should instead work '
'out which symbols are missing in `lib/gcc_s.ver` and then '
'integrate them into `lib/gcc_s.ver`. For more information, '
'please see `doc/LLVMLibgcc.rst`.\n')
for version, symbols in versioned_symbols:
f.write(f'{version} {{\n')
for i in symbols:
f.write(f' {i};\n')
f.write('};\n\n')
def read_args():
parser = argparse.ArgumentParser()
parser.add_argument('--compiler_rt',
type=str,
help='Path to `libclang_rt.builtins-${ARCH}.a`.',
required=True)
parser.add_argument('--libunwind',
type=str,
help='Path to `libunwind.a`.',
required=True)
parser.add_argument(
'--libgcc_s',
type=str,
help=
'Path to `libgcc_s.so.1`. Note that unlike the other two arguments, this is a dynamic library.',
required=True)
return parser.parse_args()
def main():
args = read_args()
llvm = find_function_names(args.compiler_rt) + find_function_names(
args.libunwind)
gcc = find_function_names(args.libgcc_s)
versioned_symbols = intersection(llvm, gcc)
# TODO(cjdb): work out a way to integrate new symbols in with the existing
# ones
to_file(versioned_symbols)
if __name__ == '__main__':
main()