[BOLT][TEST] Import jump-table-icp.test, update link_fdata script

Summary:
Import the test. The assembly input has three functions with associated fdata.
The old link_fdata.sh script only replaces the symbol names with symbol values,
whereas fdata format expects to have symbol offsets against the anchor symbol.
Introduce the link_fdata.py script which is able to parse the input and produce
either an offset or an absolute symbol value.

(cherry picked from FBD32256351)
This commit is contained in:
Amir Ayupov 2021-11-08 10:56:21 -08:00 committed by Maksim Panchenko
parent 8331f75e28
commit 3a16f2169d
5 changed files with 534 additions and 19 deletions

View file

@ -0,0 +1,307 @@
.text
.globl main
.type main, %function
main:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
subq $0x18, %rsp
.cfi_offset %rbx, -24
movl $0x0, -0x14(%rbp)
movl $0x0, -0x18(%rbp)
jmp Ltmp16
Ltmp17:
callq rand@PLT
movl %eax, %ecx
movl $0x92492493, %edx
movl %ecx, %eax
imull %edx
leal (%rdx,%rcx), %eax
sarl $0x2, %eax
movl %eax, %edx
movl %ecx, %eax
sarl $0x1f, %eax
subl %eax, %edx
movl %edx, %eax
movl %eax, -0x1c(%rbp)
movl -0x1c(%rbp), %edx
movl %edx, %eax
shll $0x3, %eax
subl %edx, %eax
subl %eax, %ecx
movl %ecx, %eax
movl %eax, -0x1c(%rbp)
callq rand@PLT
movl %eax, %ecx
movl $0x92492493, %edx
movl %ecx, %eax
imull %edx
leal (%rdx,%rcx), %eax
sarl $0x2, %eax
movl %eax, %edx
movl %ecx, %eax
sarl $0x1f, %eax
subl %eax, %edx
movl %edx, %eax
movl %eax, -0x20(%rbp)
movl -0x20(%rbp), %edx
movl %edx, %eax
shll $0x3, %eax
subl %edx, %eax
subl %eax, %ecx
movl %ecx, %eax
movl %eax, -0x20(%rbp)
movl -0x1c(%rbp), %eax
movl %eax, %edi
Ltmp17_inc:
callq _Z3inci
# FDATA: 1 main #Ltmp17_inc# 1 _Z3inci 0 0 1073
movl %eax, %ebx
movl -0x20(%rbp), %eax
movl %eax, %edi
Ltmp17_dup:
callq _Z7inc_dupi
# FDATA: 1 main #Ltmp17_dup# 1 _Z7inc_dupi 0 0 1064
movl %eax, %edx
movl $0x0, %eax
subl %edx, %eax
addl %eax, %eax
addl %ebx, %eax
addl %eax, -0x14(%rbp)
addl $0x1, -0x18(%rbp)
Ltmp16:
cmpl $0x98967f, -0x18(%rbp)
Ltmp16_br:
jle Ltmp17
# FDATA: 1 main #Ltmp16_br# 1 main #Ltmp17# 0 651
cmpl $0x0, -0x14(%rbp)
sete %al
movzbl %al, %eax
addq $0x18, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.cfi_endproc
.size main, .-main
.globl _Z3inci
.type _Z3inci, %function
_Z3inci:
.cfi_startproc
LBB00:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl %edi, -0x4(%rbp)
cmpl $0x5, -0x4(%rbp)
LBB00_br:
ja Ltmp12
# FDATA: 1 _Z3inci #LBB00_br# 1 _Z3inci #Ltmp12# 189 189
# FDATA: 1 _Z3inci #LBB00_br# 1 _Z3inci #LFT0# 0 881
LFT0:
movl -0x4(%rbp), %eax
movq "JUMP_TABLE/_Z3inci.0"(,%rax,8), %rax
LFT0_br:
jmpq *%rax
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp0# 146 163
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp1# 140 156
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp2# 126 157
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp3# 129 148
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp4# 137 150
# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp5# 134 152
Ltmp0:
movl total(%rip), %eax
addl $0x1, %eax
movl %eax, total(%rip)
movl $0x1, %eax
Ltmp0_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp0_br# 1 _Z3inci #Ltmp13# 0 167
Ltmp1:
movl total(%rip), %eax
addl $0x2, %eax
movl %eax, total(%rip)
movl $0x2, %eax
Ltmp1_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp1_br# 1 _Z3inci #Ltmp13# 0 151
Ltmp2:
movl total(%rip), %eax
addl $0x3, %eax
movl %eax, total(%rip)
movl $0x3, %eax
Ltmp2_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp2_br# 1 _Z3inci #Ltmp13# 0 152
Ltmp3:
movl total(%rip), %eax
addl $0x4, %eax
movl %eax, total(%rip)
movl $0x4, %eax
Ltmp3_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp3_br# 1 _Z3inci #Ltmp13# 0 146
Ltmp4:
movl total(%rip), %eax
addl $0x5, %eax
movl %eax, total(%rip)
movl $0x5, %eax
Ltmp4_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp4_br# 1 _Z3inci #Ltmp13# 0 149
Ltmp5:
movl total(%rip), %eax
addl $0x6, %eax
movl %eax, total(%rip)
movl $0x6, %eax
Ltmp5_br:
jmp Ltmp13
# FDATA: 1 _Z3inci #Ltmp5_br# 1 _Z3inci #Ltmp13# 0 150
Ltmp12:
movl -0x4(%rbp), %eax
addl $0x1, %eax
Ltmp13:
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.cfi_endproc
.size _Z3inci, .-_Z3inci
# Jump tables
.section .rodata
"JUMP_TABLE/_Z3inci.0":
.quad Ltmp0
.quad Ltmp1
.quad Ltmp2
.quad Ltmp3
.quad Ltmp4
.quad Ltmp5
# BinaryData
.section .bss
"total":
.text
.globl _Z7inc_dupi
.type _Z7inc_dupi, %function
_Z7inc_dupi:
.cfi_startproc
LBB01:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl %edi, -0x4(%rbp)
cmpl $0x5, -0x4(%rbp)
LBB01_br:
ja Ltmp14
# FDATA: 1 _Z7inc_dupi #LBB01_br# 1 _Z7inc_dupi #Ltmp14# 143 144
# FDATA: 1 _Z7inc_dupi #LBB01_br# 1 _Z7inc_dupi #LFT1# 0 777
LFT1:
movl -0x4(%rbp), %eax
movq "JUMP_TABLE/_Z7inc_dupi.0"(,%rax,8), %rax
LFT1_br:
jmpq *%rax
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp6# 130 137
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp7# 126 136
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp8# 122 130
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp9# 111 130
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp10# 122 140
# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp11# 104 114
Ltmp6:
movl total(%rip), %eax
addl $0x2, %eax
movl %eax, total(%rip)
movl $0x1, %eax
Ltmp6_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp6_br# 1 _Z7inc_dupi #Ltmp15# 0 106
Ltmp7:
movl total(%rip), %eax
addl $0x3, %eax
movl %eax, total(%rip)
movl $0x2, %eax
Ltmp7_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp7_br# 1 _Z7inc_dupi #Ltmp15# 0 113
Ltmp8:
movl total(%rip), %eax
addl $0x4, %eax
movl %eax, total(%rip)
movl $0x3, %eax
Ltmp8_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp8_br# 1 _Z7inc_dupi #Ltmp15# 0 97
Ltmp9:
movl total(%rip), %eax
addl $0x5, %eax
movl %eax, total(%rip)
movl $0x4, %eax
Ltmp9_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp9_br# 1 _Z7inc_dupi #Ltmp15# 0 105
Ltmp10:
movl total(%rip), %eax
addl $0x6, %eax
movl %eax, total(%rip)
movl $0x5, %eax
Ltmp10_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp10_br# 1 _Z7inc_dupi #Ltmp15# 0 98
Ltmp11:
movl total(%rip), %eax
addl $0x7, %eax
movl %eax, total(%rip)
movl $0x6, %eax
Ltmp11_br:
jmp Ltmp15
# FDATA: 1 _Z7inc_dupi #Ltmp11_br# 1 _Z7inc_dupi #Ltmp15# 0 92
Ltmp14:
movl -0x4(%rbp), %eax
addl $0x1, %eax
Ltmp15:
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.cfi_endproc
.size _Z7inc_dupi, .-_Z7inc_dupi
# Jump tables
.section .rodata
"JUMP_TABLE/_Z7inc_dupi.0":
.quad Ltmp6
.quad Ltmp7
.quad Ltmp8
.quad Ltmp9
.quad Ltmp10
.quad Ltmp11

View file

@ -0,0 +1,113 @@
RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
RUN: %p/Inputs/jump_table_icp.s -o %t.o
RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata
RUN: llvm-strip --strip-unneeded %t.o
RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
RUN: (llvm-bolt %t.exe -data %t.fdata -o %t -relocs \
RUN: -reorder-blocks=cache -split-functions=3 -split-all-cold \
RUN: -use-gnu-stack -dyno-stats -indirect-call-promotion=jump-tables \
RUN: -print-icp -v=0 \
RUN: -icp-jt-remaining-percent-threshold=10 \
RUN: -icp-jt-total-percent-threshold=2 \
RUN: -indirect-call-promotion-topn=1 \
RUN: -icp-jump-tables-targets -align-functions-max-bytes=7 2>&1 && \
RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s
BOLT-INFO: ICP total indirect callsites = 0
BOLT-INFO: ICP total jump table callsites = 2
BOLT-INFO: ICP total number of calls = 2137
BOLT-INFO: ICP percentage of calls that are indirect = 0.0%
BOLT-INFO: ICP percentage of indirect calls that can be optimized = 0.0%
BOLT-INFO: ICP percentage of indirect calls that are optimized = 0.0%
BOLT-INFO: ICP percentage of jump table calls that can be optimized = 17.7%
BOLT-INFO: ICP percentage of jump table calls that are optimized = 100.0%
CHECK: Binary Function "_Z3inci" after indirect-call-promotion
CHECK: .LBB{{.*}} (8 instructions, align : 1)
CHECK-NEXT: Entry Point
CHECK-NEXT: Exec Count : 1073
CHECK: Successors: .Ltmp{{.*}} (mispreds: 189, count: 189), .LFT{{.*}} (mispreds: 0, count: 881)
CHECK: .LFT{{.*}} (4 instructions, align : 1)
CHECK-NEXT: Exec Count : 881
CHECK: Predecessors: .LBB{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 138, count: 155), .Ltmp{{.*}} (mispreds: 0, count: 726)
CHECK: .Ltmp{{.*}} (1 instructions, align : 1)
CHECK-NEXT: Exec Count : 726
CHECK: Predecessors: .LFT{{.*}}
CHECK: Successors: .L{{.*}} (mispreds: 126, count: 157), .L{{.*}} (mispreds: 140, count: 156), .L{{.*}} (mispreds: 134, count: 152), .L{{.*}} (mispreds: 137, count: 150), .L{{.*}} (mispreds: 129, count: 148), .L{{.*}} (mispreds: 0, count: 0)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 167
CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 156
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 157
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 148
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 150
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: Binary Function "_Z7inc_dupi" after indirect-call-promotion
CHECK: .LBB{{.*}} (8 instructions, align : 1)
CHECK-NEXT: Entry Point
CHECK-NEXT: Exec Count : 1064
CHECK: Successors: .Ltmp{{.*}} (mispreds: 143, count: 144), .LFT{{.*}} (mispreds: 0, count: 777)
CHECK: .LFT{{.*}} (4 instructions, align : 1)
CHECK-NEXT: Exec Count : 777
CHECK: Predecessors: .LBB{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 120, count: 138), .Ltmp{{.*}} (mispreds: 0, count: 639)
CHECK: .Ltmp{{.*}} (1 instructions, align : 1)
CHECK-NEXT: Exec Count : 639
CHECK: Predecessors: .LFT{{.*}}
CHECK: Successors: .L{{.*}} (mispreds: 130, count: 137), .L{{.*}} (mispreds: 126, count: 136), .L{{.*}} (mispreds: 122, count: 130), .L{{.*}} (mispreds: 111, count: 130), .L{{.*}} (mispreds: 104, count: 114), .L{{.*}} (mispreds: 0, count: 0)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 137
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 106)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 136
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 113)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 130
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 97)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 130
CHECK: Predecessors: .Ltmp{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 105)
CHECK: .Ltmp{{.*}} (5 instructions, align : 1)
CHECK-NEXT: Exec Count : 140
CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98)
CHECK: <_Z3inci>:
CHECK: movq 0x{{.*}}(,%rax,8), %rax
CHECK-NEXT: cmpq $0x{{.*}}, %rax
CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}>
CHECK-NEXT: jmpq *%rax
CHECK: <_Z7inc_dupi>:
CHECK: movq 0x{{.*}}(,%rax,8), %rax
CHECK-NEXT: cmpq $0x{{.*}}, %rax
CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}>
CHECK-NEXT: jmpq *%rax

113
bolt/test/link_fdata.py Executable file
View file

@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
This script reads the input from stdin, extracts all lines starting with
"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
replaces symbol names ("#name#") with either symbol values or with offsets from
respective anchor symbols, and prints the resulting file to stdout.
"""
import argparse
import subprocess
import sys
import re
parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("objfile", help="Object file to extract symbol values from")
parser.add_argument("output")
parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
args = parser.parse_args()
# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
# profile data
prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
# FDATA records:
# <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
# <is symbol?> <closest elf symbol or DSO name> <relative TO address>
# <number of mispredictions> <number of branches>
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
# Pre-aggregated profile:
# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
# [<mispred_count>]
preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
# Replacement symbol: #symname#
replace_pat = re.compile(r"#(?P<symname>[^#]+)#")
# Read input and construct the representation of fdata expressions
# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
# are represented as (is_sym, anchor, offset) tuples
exprs = []
with open(args.input, 'r') as f:
for line in f.readlines():
prefix_match = prefix_pat.match(line)
if not prefix_match:
continue
profile_line = prefix_match.group(1)
fdata_match = fdata_pat.match(profile_line)
preagg_match = preagg_pat.match(profile_line)
if fdata_match:
src_dst, execnt, mispred = fdata_match.groups()
# Split by whitespaces not preceded by a backslash (negative lookbehind)
chunks = re.split(r'(?<!\\) +', src_dst)
# Check if the number of records separated by non-escaped whitespace
# exactly matches the format.
assert len(chunks) == 6, f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
exprs.append(('FDATA', (*chunks, execnt, mispred)))
elif preagg_match:
exprs.append(('PREAGG', preagg_match.groups()))
else:
exit("ERROR: unexpected input:\n%s" % line)
# Read nm output: <symbol value> <symbol type> <symbol name>
nm_output = subprocess.run([args.nmtool, '--defined-only', args.objfile],
text = True, capture_output = True).stdout
# Populate symbol map
symbols = {}
for symline in nm_output.splitlines():
symval, _, symname = symline.split(maxsplit=2)
symbols[symname] = symval
def evaluate_symbol(issym, anchor, offsym):
sym_match = replace_pat.match(offsym)
if not sym_match:
# No need to evaluate symbol value, return as is
return f'{issym} {anchor} {offsym}'
symname = sym_match.group('symname')
assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
# Evaluate to an absolute offset if issym is false
if issym == '0':
return f'{issym} {anchor} {symbols[symname]}'
# Evaluate symbol against its anchor if issym is true
assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
anchor_value = int(symbols[anchor], 16)
symbol_value = int(symbols[symname], 16)
sym_offset = symbol_value - anchor_value
return f'{issym} {anchor} {format(sym_offset, "x")}'
def replace_symbol(matchobj):
'''
Expects matchobj to only capture one group which contains the symbol name.
'''
symname = matchobj.group('symname')
assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
return symbols[symname]
with open(args.output, 'w') as f:
for etype, expr in exprs:
if etype == 'FDATA':
issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
print(evaluate_symbol(issym1, anchor1, offsym1),
evaluate_symbol(issym2, anchor2, offsym2),
execnt, mispred, file = f)
elif etype == 'PREAGG':
# Replace all symbols enclosed in ##
print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]),
file = f)
else:
exit("ERROR: unhandled expression type:\n%s" % etype)

View file

@ -1,18 +0,0 @@
#!/bin/bash -e
prefix=${4:-"FDATA"}
grep -e "^# ${prefix}:" < "$1" | sed -E "s/# ${prefix}: //g" > "$3"
mapfile -t symbols < <(nm --defined-only "$2")
for line in "${symbols[@]}"; do
val=$(echo $line | cut -d' ' -f1)
symname=$(echo $line | awk '{ $1=$2=""; print $0 }' | sed 's|^[ \t]*||')
if [ -z "$symname" ]; then
continue
fi
if [ -z "${val##*[!0-9a-fA-F]*}" ]; then
continue
fi
sed -i -e "s|\#${symname}\#|$val|g" $3
done

View file

@ -77,7 +77,7 @@ tools = [
ToolSubst('llvm-objcopy', unresolved='fatal'),
ToolSubst('llvm-strip', unresolved='fatal'),
ToolSubst('llvm-readelf', unresolved='fatal'),
ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'),
ToolSubst('link_fdata', command=FindTool('link_fdata.py'), unresolved='fatal'),
ToolSubst('merge-fdata', unresolved='fatal'),
]
llvm_config.add_tool_substitutions(tools, tool_dirs)