Merge pull request #138 from rust-lang/feature/various-fns

Add various fns
- Sum/Product traits
- recip/to_degrees/to_radians/min/max/clamp/signum/copysign; rust-lang/stdsimd#14
- mul_add: rust-lang/stdsimd#14, fixes rust-lang/stdsimd#102
This commit is contained in:
Jubilee 2021-06-23 14:19:08 -07:00 committed by GitHub
commit 3872723ead
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 353 additions and 1 deletions

View file

@ -49,6 +49,9 @@ extern "platform-intrinsic" {
/// fsqrt
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
/// fma
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;

View file

@ -0,0 +1,52 @@
macro_rules! impl_traits {
{ $type:ident } => {
impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
where
Self: crate::LanesAtMost32,
{
fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
iter.fold(Default::default(), core::ops::Add::add)
}
}
impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
where
Self: crate::LanesAtMost32,
{
fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
iter.fold(Default::default(), core::ops::Mul::mul)
}
}
impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
where
Self: crate::LanesAtMost32,
{
fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
iter.fold(Default::default(), core::ops::Add::add)
}
}
impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
where
Self: crate::LanesAtMost32,
{
fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
iter.fold(Default::default(), core::ops::Mul::mul)
}
}
}
}
impl_traits! { SimdF32 }
impl_traits! { SimdF64 }
impl_traits! { SimdU8 }
impl_traits! { SimdU16 }
impl_traits! { SimdU32 }
impl_traits! { SimdU64 }
impl_traits! { SimdUsize }
impl_traits! { SimdI8 }
impl_traits! { SimdI16 }
impl_traits! { SimdI32 }
impl_traits! { SimdI64 }
impl_traits! { SimdIsize }

View file

@ -22,6 +22,7 @@ pub use to_bytes::ToBytes;
mod comparisons;
mod fmt;
mod intrinsics;
mod iter;
mod ops;
mod round;

View file

@ -4,7 +4,7 @@
/// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
/// representation. Called from `define_float_vector!`.
macro_rules! impl_float_vector {
{ $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
{ $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
impl_vector! { $name, $type }
impl_float_reductions! { $name, $type }
@ -36,6 +36,18 @@ macro_rules! impl_float_vector {
unsafe { crate::intrinsics::simd_fabs(self) }
}
/// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
/// yielding a more accurate result than an unfused multiply-add.
///
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
/// true, and will be heavily dependent on designing algorithms with specific target
/// hardware in mind.
#[inline]
pub fn mul_add(self, a: Self, b: Self) -> Self {
unsafe { crate::intrinsics::simd_fma(self, a, b) }
}
/// Produces a vector where every lane has the square root value
/// of the equivalently-indexed lane in `self`
#[inline]
@ -43,6 +55,25 @@ macro_rules! impl_float_vector {
pub fn sqrt(self) -> Self {
unsafe { crate::intrinsics::simd_fsqrt(self) }
}
/// Takes the reciprocal (inverse) of each lane, `1/x`.
#[inline]
pub fn recip(self) -> Self {
Self::splat(1.0) / self
}
/// Converts each lane from radians to degrees.
#[inline]
pub fn to_degrees(self) -> Self {
// to_degrees uses a special constant for better precision, so extract that constant
self * Self::splat($type::to_degrees(1.))
}
/// Converts each lane from degrees to radians.
#[inline]
pub fn to_radians(self) -> Self {
self * Self::splat($type::to_radians(1.))
}
}
impl<const LANES: usize> $name<LANES>
@ -97,6 +128,67 @@ macro_rules! impl_float_vector {
pub fn is_normal(self) -> crate::$mask_ty<LANES> {
!(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
}
/// Replaces each lane with a number that represents its sign.
///
/// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
/// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
/// * `NAN` if the number is `NAN`
#[inline]
pub fn signum(self) -> Self {
self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self))
}
/// Returns each lane with the magnitude of `self` and the sign of `sign`.
///
/// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
#[inline]
pub fn copysign(self, sign: Self) -> Self {
let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
Self::from_bits(sign_bit | magnitude)
}
/// Returns the minimum of each lane.
///
/// If one of the values is `NAN`, then the other value is returned.
#[inline]
pub fn min(self, other: Self) -> Self {
// TODO consider using an intrinsic
self.is_nan().select(
other,
self.lanes_ge(other).select(other, self)
)
}
/// Returns the maximum of each lane.
///
/// If one of the values is `NAN`, then the other value is returned.
#[inline]
pub fn max(self, other: Self) -> Self {
// TODO consider using an intrinsic
self.is_nan().select(
other,
self.lanes_le(other).select(other, self)
)
}
/// Restrict each lane to a certain interval unless it is NaN.
///
/// For each lane in `self`, returns the corresponding lane in `max` if the lane is
/// greater than `max`, and the corresponding lane in `min` if the lane is less
/// than `min`. Otherwise returns the lane in `self`.
#[inline]
pub fn clamp(self, min: Self, max: Self) -> Self {
assert!(
min.lanes_le(max).all(),
"each lane in `min` must be less than or equal to the corresponding lane in `max`",
);
let mut x = self;
x = x.lanes_lt(min).select(min, x);
x = x.lanes_gt(max).select(max, x);
x
}
}
};
}

View file

@ -33,14 +33,28 @@ macro_rules! impl_integer_vector {
crate::$mask_ty<LANES>: crate::Mask,
{
/// Returns true for each positive lane and false if it is zero or negative.
#[inline]
pub fn is_positive(self) -> crate::$mask_ty<LANES> {
self.lanes_gt(Self::splat(0))
}
/// Returns true for each negative lane and false if it is zero or positive.
#[inline]
pub fn is_negative(self) -> crate::$mask_ty<LANES> {
self.lanes_lt(Self::splat(0))
}
/// Returns numbers representing the sign of each lane.
/// * `0` if the number is zero
/// * `1` if the number is positive
/// * `-1` if the number is negative
#[inline]
pub fn signum(self) -> Self {
self.is_positive().select(
Self::splat(1),
self.is_negative().select(Self::splat(-1), Self::splat(0))
)
}
}
}
}

View file

@ -247,6 +247,15 @@ macro_rules! impl_signed_tests {
&|_| true,
);
}
fn signum<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::signum,
&Scalar::signum,
&|_| true,
)
}
}
test_helpers::test_lanes_panic! {
@ -426,6 +435,14 @@ macro_rules! impl_float_tests {
)
}
fn mul_add<const LANES: usize>() {
test_helpers::test_ternary_elementwise(
&Vector::<LANES>::mul_add,
&Scalar::mul_add,
&|_, _, _| true,
)
}
fn sqrt<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::sqrt,
@ -433,6 +450,117 @@ macro_rules! impl_float_tests {
&|_| true,
)
}
fn recip<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::recip,
&Scalar::recip,
&|_| true,
)
}
fn to_degrees<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::to_degrees,
&Scalar::to_degrees,
&|_| true,
)
}
fn to_radians<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::to_radians,
&Scalar::to_radians,
&|_| true,
)
}
fn signum<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::signum,
&Scalar::signum,
&|_| true,
)
}
fn copysign<const LANES: usize>() {
test_helpers::test_binary_elementwise(
&Vector::<LANES>::copysign,
&Scalar::copysign,
&|_, _| true,
)
}
fn min<const LANES: usize>() {
// Regular conditions (both values aren't zero)
test_helpers::test_binary_elementwise(
&Vector::<LANES>::min,
&Scalar::min,
// Reject the case where both values are zero with different signs
&|a, b| {
for (a, b) in a.iter().zip(b.iter()) {
if *a == 0. && *b == 0. && a.signum() != b.signum() {
return false;
}
}
true
}
);
// Special case where both values are zero
let p_zero = Vector::<LANES>::splat(0.);
let n_zero = Vector::<LANES>::splat(-0.);
assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
}
fn max<const LANES: usize>() {
// Regular conditions (both values aren't zero)
test_helpers::test_binary_elementwise(
&Vector::<LANES>::max,
&Scalar::max,
// Reject the case where both values are zero with different signs
&|a, b| {
for (a, b) in a.iter().zip(b.iter()) {
if *a == 0. && *b == 0. && a.signum() != b.signum() {
return false;
}
}
true
}
);
// Special case where both values are zero
let p_zero = Vector::<LANES>::splat(0.);
let n_zero = Vector::<LANES>::splat(-0.);
assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
}
fn clamp<const LANES: usize>() {
test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
for (min, max) in min.iter_mut().zip(max.iter_mut()) {
if max < min {
core::mem::swap(min, max);
}
if min.is_nan() {
*min = Scalar::NEG_INFINITY;
}
if max.is_nan() {
*max = Scalar::INFINITY;
}
}
let mut result_scalar = [Scalar::default(); LANES];
for i in 0..LANES {
result_scalar[i] = value[i].clamp(min[i], max[i]);
}
let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
test_helpers::prop_assert_biteq!(result_scalar, result_vector);
Ok(())
})
}
fn horizontal_sum<const LANES: usize>() {
test_helpers::test_1(&|x| {
test_helpers::prop_assert_biteq! (

View file

@ -97,6 +97,27 @@ pub fn test_2<A: core::fmt::Debug + DefaultStrategy, B: core::fmt::Debug + Defau
.unwrap();
}
/// Test a function that takes two values.
pub fn test_3<
A: core::fmt::Debug + DefaultStrategy,
B: core::fmt::Debug + DefaultStrategy,
C: core::fmt::Debug + DefaultStrategy,
>(
f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult,
) {
let mut runner = proptest::test_runner::TestRunner::default();
runner
.run(
&(
A::default_strategy(),
B::default_strategy(),
C::default_strategy(),
),
|(a, b, c)| f(a, b, c),
)
.unwrap();
}
/// Test a unary vector function against a unary scalar function, applied elementwise.
#[inline(never)]
pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
@ -257,6 +278,47 @@ pub fn test_binary_scalar_lhs_elementwise<
});
}
/// Test a ternary vector function against a ternary scalar function, applied elementwise.
#[inline(never)]
pub fn test_ternary_elementwise<
Scalar1,
Scalar2,
Scalar3,
ScalarResult,
Vector1,
Vector2,
Vector3,
VectorResult,
const LANES: usize,
>(
fv: &dyn Fn(Vector1, Vector2, Vector3) -> VectorResult,
fs: &dyn Fn(Scalar1, Scalar2, Scalar3) -> ScalarResult,
check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES], [Scalar3; LANES]) -> bool,
) where
Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy,
Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy,
Scalar3: Copy + Default + core::fmt::Debug + DefaultStrategy,
ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy,
Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
Vector3: Into<[Scalar3; LANES]> + From<[Scalar3; LANES]> + Copy,
VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
{
test_3(&|x: [Scalar1; LANES], y: [Scalar2; LANES], z: [Scalar3; LANES]| {
proptest::prop_assume!(check(x, y, z));
let result_1: [ScalarResult; LANES] = fv(x.into(), y.into(), z.into()).into();
let result_2: [ScalarResult; LANES] = {
let mut result = [ScalarResult::default(); LANES];
for ((i1, (i2, i3)), o) in x.iter().zip(y.iter().zip(z.iter())).zip(result.iter_mut()) {
*o = fs(*i1, *i2, *i3);
}
result
};
crate::prop_assert_biteq!(result_1, result_2);
Ok(())
});
}
/// Expand a const-generic test into separate tests for each possible lane count.
#[macro_export]
macro_rules! test_lanes {