diff --git a/bolt/test/X86/Inputs/jump_table_icp.s b/bolt/test/X86/Inputs/jump_table_icp.s new file mode 100644 index 000000000000..de2be9b6d112 --- /dev/null +++ b/bolt/test/X86/Inputs/jump_table_icp.s @@ -0,0 +1,307 @@ + .text + .globl main + .type main, %function +main: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + pushq %rbx + subq $0x18, %rsp + .cfi_offset %rbx, -24 + movl $0x0, -0x14(%rbp) + movl $0x0, -0x18(%rbp) + jmp Ltmp16 + +Ltmp17: + callq rand@PLT + movl %eax, %ecx + movl $0x92492493, %edx + movl %ecx, %eax + imull %edx + leal (%rdx,%rcx), %eax + sarl $0x2, %eax + movl %eax, %edx + movl %ecx, %eax + sarl $0x1f, %eax + subl %eax, %edx + movl %edx, %eax + movl %eax, -0x1c(%rbp) + movl -0x1c(%rbp), %edx + movl %edx, %eax + shll $0x3, %eax + subl %edx, %eax + subl %eax, %ecx + movl %ecx, %eax + movl %eax, -0x1c(%rbp) + callq rand@PLT + movl %eax, %ecx + movl $0x92492493, %edx + movl %ecx, %eax + imull %edx + leal (%rdx,%rcx), %eax + sarl $0x2, %eax + movl %eax, %edx + movl %ecx, %eax + sarl $0x1f, %eax + subl %eax, %edx + movl %edx, %eax + movl %eax, -0x20(%rbp) + movl -0x20(%rbp), %edx + movl %edx, %eax + shll $0x3, %eax + subl %edx, %eax + subl %eax, %ecx + movl %ecx, %eax + movl %eax, -0x20(%rbp) + movl -0x1c(%rbp), %eax + movl %eax, %edi +Ltmp17_inc: + callq _Z3inci +# FDATA: 1 main #Ltmp17_inc# 1 _Z3inci 0 0 1073 + movl %eax, %ebx + movl -0x20(%rbp), %eax + movl %eax, %edi +Ltmp17_dup: + callq _Z7inc_dupi +# FDATA: 1 main #Ltmp17_dup# 1 _Z7inc_dupi 0 0 1064 + movl %eax, %edx + movl $0x0, %eax + subl %edx, %eax + addl %eax, %eax + addl %ebx, %eax + addl %eax, -0x14(%rbp) + addl $0x1, -0x18(%rbp) + +Ltmp16: + cmpl $0x98967f, -0x18(%rbp) +Ltmp16_br: + jle Ltmp17 +# FDATA: 1 main #Ltmp16_br# 1 main #Ltmp17# 0 651 + + cmpl $0x0, -0x14(%rbp) + sete %al + movzbl %al, %eax + addq $0x18, %rsp + popq %rbx + popq %rbp + .cfi_def_cfa %rsp, 8 + retq + + .cfi_endproc +.size main, .-main + + .globl _Z3inci + .type _Z3inci, %function +_Z3inci: + .cfi_startproc +LBB00: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -0x4(%rbp) + cmpl $0x5, -0x4(%rbp) +LBB00_br: + ja Ltmp12 +# FDATA: 1 _Z3inci #LBB00_br# 1 _Z3inci #Ltmp12# 189 189 +# FDATA: 1 _Z3inci #LBB00_br# 1 _Z3inci #LFT0# 0 881 + +LFT0: + movl -0x4(%rbp), %eax + movq "JUMP_TABLE/_Z3inci.0"(,%rax,8), %rax +LFT0_br: + jmpq *%rax +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp0# 146 163 +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp1# 140 156 +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp2# 126 157 +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp3# 129 148 +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp4# 137 150 +# FDATA: 1 _Z3inci #LFT0_br# 1 _Z3inci #Ltmp5# 134 152 + +Ltmp0: + movl total(%rip), %eax + addl $0x1, %eax + movl %eax, total(%rip) + movl $0x1, %eax +Ltmp0_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp0_br# 1 _Z3inci #Ltmp13# 0 167 + +Ltmp1: + movl total(%rip), %eax + addl $0x2, %eax + movl %eax, total(%rip) + movl $0x2, %eax +Ltmp1_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp1_br# 1 _Z3inci #Ltmp13# 0 151 + +Ltmp2: + movl total(%rip), %eax + addl $0x3, %eax + movl %eax, total(%rip) + movl $0x3, %eax +Ltmp2_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp2_br# 1 _Z3inci #Ltmp13# 0 152 + +Ltmp3: + movl total(%rip), %eax + addl $0x4, %eax + movl %eax, total(%rip) + movl $0x4, %eax +Ltmp3_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp3_br# 1 _Z3inci #Ltmp13# 0 146 + +Ltmp4: + movl total(%rip), %eax + addl $0x5, %eax + movl %eax, total(%rip) + movl $0x5, %eax +Ltmp4_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp4_br# 1 _Z3inci #Ltmp13# 0 149 + +Ltmp5: + movl total(%rip), %eax + addl $0x6, %eax + movl %eax, total(%rip) + movl $0x6, %eax +Ltmp5_br: + jmp Ltmp13 +# FDATA: 1 _Z3inci #Ltmp5_br# 1 _Z3inci #Ltmp13# 0 150 + +Ltmp12: + movl -0x4(%rbp), %eax + addl $0x1, %eax + +Ltmp13: + popq %rbp + .cfi_def_cfa %rsp, 8 + retq + + .cfi_endproc +.size _Z3inci, .-_Z3inci +# Jump tables +.section .rodata +"JUMP_TABLE/_Z3inci.0": + .quad Ltmp0 + .quad Ltmp1 + .quad Ltmp2 + .quad Ltmp3 + .quad Ltmp4 + .quad Ltmp5 + +# BinaryData +.section .bss +"total": + + .text + .globl _Z7inc_dupi + .type _Z7inc_dupi, %function +_Z7inc_dupi: + .cfi_startproc +LBB01: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -0x4(%rbp) + cmpl $0x5, -0x4(%rbp) +LBB01_br: + ja Ltmp14 +# FDATA: 1 _Z7inc_dupi #LBB01_br# 1 _Z7inc_dupi #Ltmp14# 143 144 +# FDATA: 1 _Z7inc_dupi #LBB01_br# 1 _Z7inc_dupi #LFT1# 0 777 + +LFT1: + movl -0x4(%rbp), %eax + movq "JUMP_TABLE/_Z7inc_dupi.0"(,%rax,8), %rax +LFT1_br: + jmpq *%rax +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp6# 130 137 +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp7# 126 136 +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp8# 122 130 +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp9# 111 130 +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp10# 122 140 +# FDATA: 1 _Z7inc_dupi #LFT1_br# 1 _Z7inc_dupi #Ltmp11# 104 114 + +Ltmp6: + movl total(%rip), %eax + addl $0x2, %eax + movl %eax, total(%rip) + movl $0x1, %eax +Ltmp6_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp6_br# 1 _Z7inc_dupi #Ltmp15# 0 106 + +Ltmp7: + movl total(%rip), %eax + addl $0x3, %eax + movl %eax, total(%rip) + movl $0x2, %eax +Ltmp7_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp7_br# 1 _Z7inc_dupi #Ltmp15# 0 113 + +Ltmp8: + movl total(%rip), %eax + addl $0x4, %eax + movl %eax, total(%rip) + movl $0x3, %eax +Ltmp8_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp8_br# 1 _Z7inc_dupi #Ltmp15# 0 97 + +Ltmp9: + movl total(%rip), %eax + addl $0x5, %eax + movl %eax, total(%rip) + movl $0x4, %eax +Ltmp9_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp9_br# 1 _Z7inc_dupi #Ltmp15# 0 105 + +Ltmp10: + movl total(%rip), %eax + addl $0x6, %eax + movl %eax, total(%rip) + movl $0x5, %eax +Ltmp10_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp10_br# 1 _Z7inc_dupi #Ltmp15# 0 98 + +Ltmp11: + movl total(%rip), %eax + addl $0x7, %eax + movl %eax, total(%rip) + movl $0x6, %eax +Ltmp11_br: + jmp Ltmp15 +# FDATA: 1 _Z7inc_dupi #Ltmp11_br# 1 _Z7inc_dupi #Ltmp15# 0 92 + +Ltmp14: + movl -0x4(%rbp), %eax + addl $0x1, %eax + +Ltmp15: + popq %rbp + .cfi_def_cfa %rsp, 8 + retq + + .cfi_endproc +.size _Z7inc_dupi, .-_Z7inc_dupi +# Jump tables +.section .rodata +"JUMP_TABLE/_Z7inc_dupi.0": + .quad Ltmp6 + .quad Ltmp7 + .quad Ltmp8 + .quad Ltmp9 + .quad Ltmp10 + .quad Ltmp11 diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test new file mode 100644 index 000000000000..77f28577a9cc --- /dev/null +++ b/bolt/test/X86/jump-table-icp.test @@ -0,0 +1,113 @@ +RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +RUN: %p/Inputs/jump_table_icp.s -o %t.o +RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata +RUN: llvm-strip --strip-unneeded %t.o +RUN: %clang %cflags %t.o -o %t.exe -Wl,-q + +RUN: (llvm-bolt %t.exe -data %t.fdata -o %t -relocs \ +RUN: -reorder-blocks=cache -split-functions=3 -split-all-cold \ +RUN: -use-gnu-stack -dyno-stats -indirect-call-promotion=jump-tables \ +RUN: -print-icp -v=0 \ +RUN: -icp-jt-remaining-percent-threshold=10 \ +RUN: -icp-jt-total-percent-threshold=2 \ +RUN: -indirect-call-promotion-topn=1 \ +RUN: -icp-jump-tables-targets -align-functions-max-bytes=7 2>&1 && \ +RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s + +BOLT-INFO: ICP total indirect callsites = 0 +BOLT-INFO: ICP total jump table callsites = 2 +BOLT-INFO: ICP total number of calls = 2137 +BOLT-INFO: ICP percentage of calls that are indirect = 0.0% +BOLT-INFO: ICP percentage of indirect calls that can be optimized = 0.0% +BOLT-INFO: ICP percentage of indirect calls that are optimized = 0.0% +BOLT-INFO: ICP percentage of jump table calls that can be optimized = 17.7% +BOLT-INFO: ICP percentage of jump table calls that are optimized = 100.0% + +CHECK: Binary Function "_Z3inci" after indirect-call-promotion +CHECK: .LBB{{.*}} (8 instructions, align : 1) +CHECK-NEXT: Entry Point +CHECK-NEXT: Exec Count : 1073 +CHECK: Successors: .Ltmp{{.*}} (mispreds: 189, count: 189), .LFT{{.*}} (mispreds: 0, count: 881) + +CHECK: .LFT{{.*}} (4 instructions, align : 1) +CHECK-NEXT: Exec Count : 881 +CHECK: Predecessors: .LBB{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 138, count: 155), .Ltmp{{.*}} (mispreds: 0, count: 726) + +CHECK: .Ltmp{{.*}} (1 instructions, align : 1) +CHECK-NEXT: Exec Count : 726 +CHECK: Predecessors: .LFT{{.*}} +CHECK: Successors: .L{{.*}} (mispreds: 126, count: 157), .L{{.*}} (mispreds: 140, count: 156), .L{{.*}} (mispreds: 134, count: 152), .L{{.*}} (mispreds: 137, count: 150), .L{{.*}} (mispreds: 129, count: 148), .L{{.*}} (mispreds: 0, count: 0) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 167 +CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}} + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 156 +CHECK: Predecessors: .Ltmp{{.*}} + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 157 +CHECK: Predecessors: .Ltmp{{.*}} + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 148 +CHECK: Predecessors: .Ltmp{{.*}} + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 150 +CHECK: Predecessors: .Ltmp{{.*}} + +CHECK: Binary Function "_Z7inc_dupi" after indirect-call-promotion +CHECK: .LBB{{.*}} (8 instructions, align : 1) +CHECK-NEXT: Entry Point +CHECK-NEXT: Exec Count : 1064 +CHECK: Successors: .Ltmp{{.*}} (mispreds: 143, count: 144), .LFT{{.*}} (mispreds: 0, count: 777) + +CHECK: .LFT{{.*}} (4 instructions, align : 1) +CHECK-NEXT: Exec Count : 777 +CHECK: Predecessors: .LBB{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 120, count: 138), .Ltmp{{.*}} (mispreds: 0, count: 639) + +CHECK: .Ltmp{{.*}} (1 instructions, align : 1) +CHECK-NEXT: Exec Count : 639 +CHECK: Predecessors: .LFT{{.*}} +CHECK: Successors: .L{{.*}} (mispreds: 130, count: 137), .L{{.*}} (mispreds: 126, count: 136), .L{{.*}} (mispreds: 122, count: 130), .L{{.*}} (mispreds: 111, count: 130), .L{{.*}} (mispreds: 104, count: 114), .L{{.*}} (mispreds: 0, count: 0) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 137 +CHECK: Predecessors: .Ltmp{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 106) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 136 +CHECK: Predecessors: .Ltmp{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 113) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 130 +CHECK: Predecessors: .Ltmp{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 97) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 130 +CHECK: Predecessors: .Ltmp{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 105) + +CHECK: .Ltmp{{.*}} (5 instructions, align : 1) +CHECK-NEXT: Exec Count : 140 +CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}} +CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98) + +CHECK: <_Z3inci>: +CHECK: movq 0x{{.*}}(,%rax,8), %rax +CHECK-NEXT: cmpq $0x{{.*}}, %rax +CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}> +CHECK-NEXT: jmpq *%rax + +CHECK: <_Z7inc_dupi>: +CHECK: movq 0x{{.*}}(,%rax,8), %rax +CHECK-NEXT: cmpq $0x{{.*}}, %rax +CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}> +CHECK-NEXT: jmpq *%rax diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py new file mode 100755 index 000000000000..35fa5ea0c503 --- /dev/null +++ b/bolt/test/link_fdata.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +""" +This script reads the input from stdin, extracts all lines starting with +"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives, +replaces symbol names ("#name#") with either symbol values or with offsets from +respective anchor symbols, and prints the resulting file to stdout. +""" + +import argparse +import subprocess +import sys +import re + +parser = argparse.ArgumentParser() +parser.add_argument("input") +parser.add_argument("objfile", help="Object file to extract symbol values from") +parser.add_argument("output") +parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") +parser.add_argument("--nmtool", default="nm", help="Path to nm tool") + +args = parser.parse_args() + +# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated +# profile data +prefix_pat = re.compile(f"^# {args.prefix}: (.*)") + +# FDATA records: +# +# +# +fdata_pat = re.compile(r"([01].*) (?P\d+) (?P\d+)") + +# Pre-aggregated profile: +# {B|F|f} [:] [:] +# [] +preagg_pat = re.compile(r"(?P[BFf]) (?P.*)") + +# Replacement symbol: #symname# +replace_pat = re.compile(r"#(?P[^#]+)#") + +# Read input and construct the representation of fdata expressions +# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst +# are represented as (is_sym, anchor, offset) tuples +exprs = [] +with open(args.input, 'r') as f: + for line in f.readlines(): + prefix_match = prefix_pat.match(line) + if not prefix_match: + continue + profile_line = prefix_match.group(1) + fdata_match = fdata_pat.match(profile_line) + preagg_match = preagg_pat.match(profile_line) + if fdata_match: + src_dst, execnt, mispred = fdata_match.groups() + # Split by whitespaces not preceded by a backslash (negative lookbehind) + chunks = re.split(r'(? +nm_output = subprocess.run([args.nmtool, '--defined-only', args.objfile], + text = True, capture_output = True).stdout +# Populate symbol map +symbols = {} +for symline in nm_output.splitlines(): + symval, _, symname = symline.split(maxsplit=2) + symbols[symname] = symval + +def evaluate_symbol(issym, anchor, offsym): + sym_match = replace_pat.match(offsym) + if not sym_match: + # No need to evaluate symbol value, return as is + return f'{issym} {anchor} {offsym}' + symname = sym_match.group('symname') + assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" + # Evaluate to an absolute offset if issym is false + if issym == '0': + return f'{issym} {anchor} {symbols[symname]}' + # Evaluate symbol against its anchor if issym is true + assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary" + anchor_value = int(symbols[anchor], 16) + symbol_value = int(symbols[symname], 16) + sym_offset = symbol_value - anchor_value + return f'{issym} {anchor} {format(sym_offset, "x")}' + +def replace_symbol(matchobj): + ''' + Expects matchobj to only capture one group which contains the symbol name. + ''' + symname = matchobj.group('symname') + assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" + return symbols[symname] + +with open(args.output, 'w') as f: + for etype, expr in exprs: + if etype == 'FDATA': + issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr + print(evaluate_symbol(issym1, anchor1, offsym1), + evaluate_symbol(issym2, anchor2, offsym2), + execnt, mispred, file = f) + elif etype == 'PREAGG': + # Replace all symbols enclosed in ## + print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), + file = f) + else: + exit("ERROR: unhandled expression type:\n%s" % etype) diff --git a/bolt/test/link_fdata.sh b/bolt/test/link_fdata.sh deleted file mode 100755 index c7f41b876b05..000000000000 --- a/bolt/test/link_fdata.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -e - -prefix=${4:-"FDATA"} - -grep -e "^# ${prefix}:" < "$1" | sed -E "s/# ${prefix}: //g" > "$3" -mapfile -t symbols < <(nm --defined-only "$2") - -for line in "${symbols[@]}"; do - val=$(echo $line | cut -d' ' -f1) - symname=$(echo $line | awk '{ $1=$2=""; print $0 }' | sed 's|^[ \t]*||') - if [ -z "$symname" ]; then - continue - fi - if [ -z "${val##*[!0-9a-fA-F]*}" ]; then - continue - fi - sed -i -e "s|\#${symname}\#|$val|g" $3 -done diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index 8a8c2979df75..3d66b7bf7919 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -77,7 +77,7 @@ tools = [ ToolSubst('llvm-objcopy', unresolved='fatal'), ToolSubst('llvm-strip', unresolved='fatal'), ToolSubst('llvm-readelf', unresolved='fatal'), - ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'), + ToolSubst('link_fdata', command=FindTool('link_fdata.py'), unresolved='fatal'), ToolSubst('merge-fdata', unresolved='fatal'), ] llvm_config.add_tool_substitutions(tools, tool_dirs)