From 628fbbef81c5ac806e6dbf2bce18dd44980051b1 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Mon, 25 Jul 2022 13:44:46 -0400 Subject: [PATCH] [libc] Use nearest_integer instructions to improve expm1f performance. Use nearest_integer instructions to improve expf performance. Performance tests with CORE-MATH's perf tool: Before the patch: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 10.096 System LIBC reciprocal throughput : 44.036 LIBC reciprocal throughput : 11.575 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 42.239 System LIBC latency : 122.815 LIBC latency : 50.122 ``` After the patch: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 10.046 System LIBC reciprocal throughput : 43.899 LIBC reciprocal throughput : 9.179 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 42.078 System LIBC latency : 120.488 LIBC latency : 41.528 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D130502 --- libc/docs/math.rst | 2 +- libc/src/math/generic/CMakeLists.txt | 1 + libc/src/math/generic/expm1f.cpp | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libc/docs/math.rst b/libc/docs/math.rst index 42d09617d6bb..f7d85195460c 100644 --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -203,7 +203,7 @@ Performance +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | exp2f | 25 | 8 | 81 | 37 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| expm1f | 14 | 53 | 59 | 146 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +| expm1f | 9 | 44 | 42 | 121 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | | +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 2658b9691d22..13849fa04289 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -515,6 +515,7 @@ add_entrypoint_object( .common_constants libc.src.__support.FPUtil.fputil libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.include.math COMPILE_OPTIONS diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp index 3019d71369c0..6c187b838acd 100644 --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -13,6 +13,8 @@ #include "src/__support/FPUtil/FMA.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include @@ -133,10 +135,10 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { // generated by Sollya. // x_hi = hi + mid. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); + float kf = fputil::nearest_integer(x * 0x1.0p7f); + int x_hi = static_cast(kf); // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); + double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); x_hi += 104 << 7; // hi = x_hi >> 7 double exp_hi = EXP_M1[x_hi >> 7];