From 628fbbef81c5ac806e6dbf2bce18dd44980051b1 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Mon, 25 Jul 2022 13:44:46 -0400
Subject: [PATCH] [libc] Use nearest_integer instructions to improve expm1f
 performance.

Use nearest_integer instructions to improve expf performance.

Performance tests with CORE-MATH's perf tool:

Before the patch:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.31
GNU libc release: stable
CORE-MATH reciprocal throughput   : 10.096
System LIBC reciprocal throughput : 44.036
LIBC reciprocal throughput        : 11.575

$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.31
GNU libc release: stable
CORE-MATH latency   : 42.239
System LIBC latency : 122.815
LIBC latency        : 50.122
```
After the patch:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.31
GNU libc release: stable
CORE-MATH reciprocal throughput   : 10.046
System LIBC reciprocal throughput : 43.899
LIBC reciprocal throughput        : 9.179

$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.31
GNU libc release: stable
CORE-MATH latency   : 42.078
System LIBC latency : 120.488
LIBC latency        : 41.528
```

Reviewed By: zimmermann6

Differential Revision: https://reviews.llvm.org/D130502
---
 libc/docs/math.rst                   | 2 +-
 libc/src/math/generic/CMakeLists.txt | 1 +
 libc/src/math/generic/expm1f.cpp     | 8 +++++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/libc/docs/math.rst b/libc/docs/math.rst
index 42d09617d6bb..f7d85195460c 100644
--- a/libc/docs/math.rst
+++ b/libc/docs/math.rst
@@ -203,7 +203,7 @@ Performance
 +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
 | exp2f        |        25 |                 8 |        81 |                37 | :math:`[-10, 10]`                   | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
 +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| expm1f       |        14 |                53 |        59 |               146 | :math:`[-10, 10]`                   | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
+| expm1f       |         9 |                44 |        42 |               121 | :math:`[-10, 10]`                   | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
 +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
 | fmodf        |        73 |               263 |        -  |                 - | [MIN_NORMAL, MAX_NORMAL]            | i5 mobile  | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 |               |
 |              +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 2658b9691d22..13849fa04289 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -515,6 +515,7 @@ add_entrypoint_object(
     .common_constants
     libc.src.__support.FPUtil.fputil
     libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
     libc.src.__support.FPUtil.polyeval
     libc.include.math
   COMPILE_OPTIONS
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 3019d71369c0..6c187b838acd 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -13,6 +13,8 @@
 #include "src/__support/FPUtil/FMA.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/common.h"
 
 #include <errno.h>
@@ -133,10 +135,10 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
   // generated by Sollya.
 
   // x_hi = hi + mid.
-  int x_hi = static_cast<int>(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f));
+  float kf = fputil::nearest_integer(x * 0x1.0p7f);
+  int x_hi = static_cast<int>(kf);
   // Subtract (hi + mid) from x to get lo.
-  x -= static_cast<float>(x_hi) * 0x1.0p-7f;
-  double xd = static_cast<double>(x);
+  double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
   x_hi += 104 << 7;
   // hi = x_hi >> 7
   double exp_hi = EXP_M1[x_hi >> 7];