Merge pull request #138 from rust-lang/feature/various-fns

Add various fns - Sum/Product traits - recip/to_degrees/to_radians/min/max/clamp/signum/copysign; rust-lang/stdsimd#14 - mul_add: rust-lang/stdsimd#14, fixes rust-lang/stdsimd#102
2021-06-23 14:19:08 -07:00 · 2021-06-23 14:19:08 -07:00 · 3872723ead
commit 3872723ead
parent 15b4e28004 b0a9fe5d07
7 changed files with 353 additions and 1 deletions
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@ -49,6 +49,9 @@ extern "platform-intrinsic" {
    /// fsqrt
    pub(crate) fn simd_fsqrt<T>(x: T) -> T;

+    /// fma
+    pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
+
    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
--- a/crates/core_simd/src/iter.rs
+++ b/crates/core_simd/src/iter.rs
@ -0,0 +1,52 @@
+macro_rules! impl_traits {
+    { $type:ident } => {
+        impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
+        where
+            Self: crate::LanesAtMost32,
+        {
+            fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Default::default(), core::ops::Add::add)
+            }
+        }
+
+        impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
+        where
+            Self: crate::LanesAtMost32,
+        {
+            fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Default::default(), core::ops::Mul::mul)
+            }
+        }
+
+        impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
+        where
+            Self: crate::LanesAtMost32,
+        {
+            fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Default::default(), core::ops::Add::add)
+            }
+        }
+
+        impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
+        where
+            Self: crate::LanesAtMost32,
+        {
+            fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Default::default(), core::ops::Mul::mul)
+            }
+        }
+    }
+}
+
+impl_traits! { SimdF32 }
+impl_traits! { SimdF64 }
+impl_traits! { SimdU8 }
+impl_traits! { SimdU16 }
+impl_traits! { SimdU32 }
+impl_traits! { SimdU64 }
+impl_traits! { SimdUsize }
+impl_traits! { SimdI8 }
+impl_traits! { SimdI16 }
+impl_traits! { SimdI32 }
+impl_traits! { SimdI64 }
+impl_traits! { SimdIsize }
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@ -22,6 +22,7 @@ pub use to_bytes::ToBytes;
 mod comparisons;
 mod fmt;
 mod intrinsics;
+mod iter;
 mod ops;
 mod round;

--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@ -4,7 +4,7 @@
 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
 /// representation. Called from `define_float_vector!`.
 macro_rules! impl_float_vector {
-    { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
+    { $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
        impl_vector! { $name, $type }
        impl_float_reductions! { $name, $type }

@ -36,6 +36,18 @@ macro_rules! impl_float_vector {
                unsafe { crate::intrinsics::simd_fabs(self) }
            }

+            /// Fused multiply-add.  Computes `(self * a) + b` with only one rounding error,
+            /// yielding a more accurate result than an unfused multiply-add.
+            ///
+            /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
+            /// architecture has a dedicated `fma` CPU instruction.  However, this is not always
+            /// true, and will be heavily dependent on designing algorithms with specific target
+            /// hardware in mind.
+            #[inline]
+            pub fn mul_add(self, a: Self, b: Self) -> Self {
+                unsafe { crate::intrinsics::simd_fma(self, a, b) }
+            }
+
            /// Produces a vector where every lane has the square root value
            /// of the equivalently-indexed lane in `self`
            #[inline]
@ -43,6 +55,25 @@ macro_rules! impl_float_vector {
            pub fn sqrt(self) -> Self {
                unsafe { crate::intrinsics::simd_fsqrt(self) }
            }
+
+            /// Takes the reciprocal (inverse) of each lane, `1/x`.
+            #[inline]
+            pub fn recip(self) -> Self {
+                Self::splat(1.0) / self
+            }
+
+            /// Converts each lane from radians to degrees.
+            #[inline]
+            pub fn to_degrees(self) -> Self {
+                // to_degrees uses a special constant for better precision, so extract that constant
+                self * Self::splat($type::to_degrees(1.))
+            }
+
+            /// Converts each lane from degrees to radians.
+            #[inline]
+            pub fn to_radians(self) -> Self {
+                self * Self::splat($type::to_radians(1.))
+            }
        }

        impl<const LANES: usize> $name<LANES>
@ -97,6 +128,67 @@ macro_rules! impl_float_vector {
            pub fn is_normal(self) -> crate::$mask_ty<LANES> {
                !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
            }
+
+            /// Replaces each lane with a number that represents its sign.
+            ///
+            /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
+            /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
+            /// * `NAN` if the number is `NAN`
+            #[inline]
+            pub fn signum(self) -> Self {
+                self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self))
+            }
+
+            /// Returns each lane with the magnitude of `self` and the sign of `sign`.
+            ///
+            /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
+            #[inline]
+            pub fn copysign(self, sign: Self) -> Self {
+                let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
+                let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
+                Self::from_bits(sign_bit | magnitude)
+            }
+
+            /// Returns the minimum of each lane.
+            ///
+            /// If one of the values is `NAN`, then the other value is returned.
+            #[inline]
+            pub fn min(self, other: Self) -> Self {
+                // TODO consider using an intrinsic
+                self.is_nan().select(
+                    other,
+                    self.lanes_ge(other).select(other, self)
+                )
+            }
+
+            /// Returns the maximum of each lane.
+            ///
+            /// If one of the values is `NAN`, then the other value is returned.
+            #[inline]
+            pub fn max(self, other: Self) -> Self {
+                // TODO consider using an intrinsic
+                self.is_nan().select(
+                    other,
+                    self.lanes_le(other).select(other, self)
+                )
+            }
+
+            /// Restrict each lane to a certain interval unless it is NaN.
+            /// 
+            /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
+            /// greater than `max`, and the corresponding lane in `min` if the lane is less
+            /// than `min`.  Otherwise returns the lane in `self`.
+            #[inline]
+            pub fn clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.lanes_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                let mut x = self;
+                x = x.lanes_lt(min).select(min, x);
+                x = x.lanes_gt(max).select(max, x);
+                x
+            }
        }
    };
 }
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@ -33,14 +33,28 @@ macro_rules! impl_integer_vector {
            crate::$mask_ty<LANES>: crate::Mask,
        {
            /// Returns true for each positive lane and false if it is zero or negative.
+            #[inline]
            pub fn is_positive(self) -> crate::$mask_ty<LANES> {
                self.lanes_gt(Self::splat(0))
            }

            /// Returns true for each negative lane and false if it is zero or positive.
+            #[inline]
            pub fn is_negative(self) -> crate::$mask_ty<LANES> {
                self.lanes_lt(Self::splat(0))
            }
+
+            /// Returns numbers representing the sign of each lane.
+            /// * `0` if the number is zero
+            /// * `1` if the number is positive
+            /// * `-1` if the number is negative
+            #[inline]
+            pub fn signum(self) -> Self {
+                self.is_positive().select(
+                    Self::splat(1),
+                    self.is_negative().select(Self::splat(-1), Self::splat(0))
+                )
+            }
        }
    }
 }
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@ -247,6 +247,15 @@ macro_rules! impl_signed_tests {
                        &|_| true,
                    );
                }
+
+                fn signum<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::signum,
+                        &Scalar::signum,
+                        &|_| true,
+                    )
+                }
+
            }

            test_helpers::test_lanes_panic! {
@ -426,6 +435,14 @@ macro_rules! impl_float_tests {
                    )
                }

+                fn mul_add<const LANES: usize>() {
+                    test_helpers::test_ternary_elementwise(
+                        &Vector::<LANES>::mul_add,
+                        &Scalar::mul_add,
+                        &|_, _, _| true,
+                    )
+                }
+
                fn sqrt<const LANES: usize>() {
                    test_helpers::test_unary_elementwise(
                        &Vector::<LANES>::sqrt,
@ -433,6 +450,117 @@ macro_rules! impl_float_tests {
                        &|_| true,
                    )
                }
+
+                fn recip<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::recip,
+                        &Scalar::recip,
+                        &|_| true,
+                    )
+                }
+
+                fn to_degrees<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::to_degrees,
+                        &Scalar::to_degrees,
+                        &|_| true,
+                    )
+                }
+
+                fn to_radians<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::to_radians,
+                        &Scalar::to_radians,
+                        &|_| true,
+                    )
+                }
+
+                fn signum<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::signum,
+                        &Scalar::signum,
+                        &|_| true,
+                    )
+                }
+
+                fn copysign<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::copysign,
+                        &Scalar::copysign,
+                        &|_, _| true,
+                    )
+                }
+
+                fn min<const LANES: usize>() {
+                    // Regular conditions (both values aren't zero)
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::min,
+                        &Scalar::min,
+                        // Reject the case where both values are zero with different signs
+                        &|a, b| {
+                            for (a, b) in a.iter().zip(b.iter()) {
+                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+                                    return false;
+                                }
+                            }
+                            true
+                        }
+                    );
+
+                    // Special case where both values are zero
+                    let p_zero = Vector::<LANES>::splat(0.);
+                    let n_zero = Vector::<LANES>::splat(-0.);
+                    assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
+                }
+
+                fn max<const LANES: usize>() {
+                    // Regular conditions (both values aren't zero)
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::max,
+                        &Scalar::max,
+                        // Reject the case where both values are zero with different signs
+                        &|a, b| {
+                            for (a, b) in a.iter().zip(b.iter()) {
+                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+                                    return false;
+                                }
+                            }
+                            true
+                        }
+                    );
+
+                    // Special case where both values are zero
+                    let p_zero = Vector::<LANES>::splat(0.);
+                    let n_zero = Vector::<LANES>::splat(-0.);
+                    assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
+                }
+
+                fn clamp<const LANES: usize>() {
+                    test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+                        for (min, max) in min.iter_mut().zip(max.iter_mut()) {
+                            if max < min {
+                                core::mem::swap(min, max);
+                            }
+                            if min.is_nan() {
+                                *min = Scalar::NEG_INFINITY;
+                            }
+                            if max.is_nan() {
+                                *max = Scalar::INFINITY;
+                            }
+                        }
+
+                        let mut result_scalar = [Scalar::default(); LANES];
+                        for i in 0..LANES {
+                            result_scalar[i] = value[i].clamp(min[i], max[i]);
+                        }
+                        let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
+                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+                        Ok(())
+                    })
+                }
+
                fn horizontal_sum<const LANES: usize>() {
                    test_helpers::test_1(&|x| {
                        test_helpers::prop_assert_biteq! (
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@ -97,6 +97,27 @@ pub fn test_2<A: core::fmt::Debug + DefaultStrategy, B: core::fmt::Debug + Defau
        .unwrap();
 }

+/// Test a function that takes two values.
+pub fn test_3<
+    A: core::fmt::Debug + DefaultStrategy,
+    B: core::fmt::Debug + DefaultStrategy,
+    C: core::fmt::Debug + DefaultStrategy,
+>(
+    f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult,
+) {
+    let mut runner = proptest::test_runner::TestRunner::default();
+    runner
+        .run(
+            &(
+                A::default_strategy(),
+                B::default_strategy(),
+                C::default_strategy(),
+            ),
+            |(a, b, c)| f(a, b, c),
+        )
+        .unwrap();
+}
+
 /// Test a unary vector function against a unary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
@ -257,6 +278,47 @@ pub fn test_binary_scalar_lhs_elementwise<
    });
 }

+/// Test a ternary vector function against a ternary scalar function, applied elementwise.
+#[inline(never)]
+pub fn test_ternary_elementwise<
+    Scalar1,
+    Scalar2,
+    Scalar3,
+    ScalarResult,
+    Vector1,
+    Vector2,
+    Vector3,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector1, Vector2, Vector3) -> VectorResult,
+    fs: &dyn Fn(Scalar1, Scalar2, Scalar3) -> ScalarResult,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES], [Scalar3; LANES]) -> bool,
+) where
+    Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy,
+    Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy,
+    Scalar3: Copy + Default + core::fmt::Debug + DefaultStrategy,
+    ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    Vector3: Into<[Scalar3; LANES]> + From<[Scalar3; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    test_3(&|x: [Scalar1; LANES], y: [Scalar2; LANES], z: [Scalar3; LANES]| {
+        proptest::prop_assume!(check(x, y, z));
+        let result_1: [ScalarResult; LANES] = fv(x.into(), y.into(), z.into()).into();
+        let result_2: [ScalarResult; LANES] = {
+            let mut result = [ScalarResult::default(); LANES];
+            for ((i1, (i2, i3)), o) in x.iter().zip(y.iter().zip(z.iter())).zip(result.iter_mut()) {
+                *o = fs(*i1, *i2, *i3);
+            }
+            result
+        };
+        crate::prop_assert_biteq!(result_1, result_2);
+        Ok(())
+    });
+}
+
 /// Expand a const-generic test into separate tests for each possible lane count.
 #[macro_export]
 macro_rules! test_lanes {