Merge pull request #138 from rust-lang/feature/various-fns
Add various fns - Sum/Product traits - recip/to_degrees/to_radians/min/max/clamp/signum/copysign; rust-lang/stdsimd#14 - mul_add: rust-lang/stdsimd#14, fixes rust-lang/stdsimd#102
This commit is contained in:
commit
3872723ead
7 changed files with 353 additions and 1 deletions
|
@ -49,6 +49,9 @@ extern "platform-intrinsic" {
|
|||
/// fsqrt
|
||||
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
|
||||
|
||||
/// fma
|
||||
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||
|
||||
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
|
|
52
crates/core_simd/src/iter.rs
Normal file
52
crates/core_simd/src/iter.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
macro_rules! impl_traits {
|
||||
{ $type:ident } => {
|
||||
impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
{
|
||||
fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Add::add)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
{
|
||||
fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Mul::mul)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
{
|
||||
fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Add::add)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
{
|
||||
fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Mul::mul)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_traits! { SimdF32 }
|
||||
impl_traits! { SimdF64 }
|
||||
impl_traits! { SimdU8 }
|
||||
impl_traits! { SimdU16 }
|
||||
impl_traits! { SimdU32 }
|
||||
impl_traits! { SimdU64 }
|
||||
impl_traits! { SimdUsize }
|
||||
impl_traits! { SimdI8 }
|
||||
impl_traits! { SimdI16 }
|
||||
impl_traits! { SimdI32 }
|
||||
impl_traits! { SimdI64 }
|
||||
impl_traits! { SimdIsize }
|
|
@ -22,6 +22,7 @@ pub use to_bytes::ToBytes;
|
|||
mod comparisons;
|
||||
mod fmt;
|
||||
mod intrinsics;
|
||||
mod iter;
|
||||
mod ops;
|
||||
mod round;
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
/// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
|
||||
/// representation. Called from `define_float_vector!`.
|
||||
macro_rules! impl_float_vector {
|
||||
{ $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
|
||||
{ $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
|
||||
impl_vector! { $name, $type }
|
||||
impl_float_reductions! { $name, $type }
|
||||
|
||||
|
@ -36,6 +36,18 @@ macro_rules! impl_float_vector {
|
|||
unsafe { crate::intrinsics::simd_fabs(self) }
|
||||
}
|
||||
|
||||
/// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
||||
/// yielding a more accurate result than an unfused multiply-add.
|
||||
///
|
||||
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
||||
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
|
||||
/// true, and will be heavily dependent on designing algorithms with specific target
|
||||
/// hardware in mind.
|
||||
#[inline]
|
||||
pub fn mul_add(self, a: Self, b: Self) -> Self {
|
||||
unsafe { crate::intrinsics::simd_fma(self, a, b) }
|
||||
}
|
||||
|
||||
/// Produces a vector where every lane has the square root value
|
||||
/// of the equivalently-indexed lane in `self`
|
||||
#[inline]
|
||||
|
@ -43,6 +55,25 @@ macro_rules! impl_float_vector {
|
|||
pub fn sqrt(self) -> Self {
|
||||
unsafe { crate::intrinsics::simd_fsqrt(self) }
|
||||
}
|
||||
|
||||
/// Takes the reciprocal (inverse) of each lane, `1/x`.
|
||||
#[inline]
|
||||
pub fn recip(self) -> Self {
|
||||
Self::splat(1.0) / self
|
||||
}
|
||||
|
||||
/// Converts each lane from radians to degrees.
|
||||
#[inline]
|
||||
pub fn to_degrees(self) -> Self {
|
||||
// to_degrees uses a special constant for better precision, so extract that constant
|
||||
self * Self::splat($type::to_degrees(1.))
|
||||
}
|
||||
|
||||
/// Converts each lane from degrees to radians.
|
||||
#[inline]
|
||||
pub fn to_radians(self) -> Self {
|
||||
self * Self::splat($type::to_radians(1.))
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
|
@ -97,6 +128,67 @@ macro_rules! impl_float_vector {
|
|||
pub fn is_normal(self) -> crate::$mask_ty<LANES> {
|
||||
!(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
|
||||
}
|
||||
|
||||
/// Replaces each lane with a number that represents its sign.
|
||||
///
|
||||
/// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
|
||||
/// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
|
||||
/// * `NAN` if the number is `NAN`
|
||||
#[inline]
|
||||
pub fn signum(self) -> Self {
|
||||
self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self))
|
||||
}
|
||||
|
||||
/// Returns each lane with the magnitude of `self` and the sign of `sign`.
|
||||
///
|
||||
/// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
|
||||
#[inline]
|
||||
pub fn copysign(self, sign: Self) -> Self {
|
||||
let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
|
||||
let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
|
||||
Self::from_bits(sign_bit | magnitude)
|
||||
}
|
||||
|
||||
/// Returns the minimum of each lane.
|
||||
///
|
||||
/// If one of the values is `NAN`, then the other value is returned.
|
||||
#[inline]
|
||||
pub fn min(self, other: Self) -> Self {
|
||||
// TODO consider using an intrinsic
|
||||
self.is_nan().select(
|
||||
other,
|
||||
self.lanes_ge(other).select(other, self)
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the maximum of each lane.
|
||||
///
|
||||
/// If one of the values is `NAN`, then the other value is returned.
|
||||
#[inline]
|
||||
pub fn max(self, other: Self) -> Self {
|
||||
// TODO consider using an intrinsic
|
||||
self.is_nan().select(
|
||||
other,
|
||||
self.lanes_le(other).select(other, self)
|
||||
)
|
||||
}
|
||||
|
||||
/// Restrict each lane to a certain interval unless it is NaN.
|
||||
///
|
||||
/// For each lane in `self`, returns the corresponding lane in `max` if the lane is
|
||||
/// greater than `max`, and the corresponding lane in `min` if the lane is less
|
||||
/// than `min`. Otherwise returns the lane in `self`.
|
||||
#[inline]
|
||||
pub fn clamp(self, min: Self, max: Self) -> Self {
|
||||
assert!(
|
||||
min.lanes_le(max).all(),
|
||||
"each lane in `min` must be less than or equal to the corresponding lane in `max`",
|
||||
);
|
||||
let mut x = self;
|
||||
x = x.lanes_lt(min).select(min, x);
|
||||
x = x.lanes_gt(max).select(max, x);
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -33,14 +33,28 @@ macro_rules! impl_integer_vector {
|
|||
crate::$mask_ty<LANES>: crate::Mask,
|
||||
{
|
||||
/// Returns true for each positive lane and false if it is zero or negative.
|
||||
#[inline]
|
||||
pub fn is_positive(self) -> crate::$mask_ty<LANES> {
|
||||
self.lanes_gt(Self::splat(0))
|
||||
}
|
||||
|
||||
/// Returns true for each negative lane and false if it is zero or positive.
|
||||
#[inline]
|
||||
pub fn is_negative(self) -> crate::$mask_ty<LANES> {
|
||||
self.lanes_lt(Self::splat(0))
|
||||
}
|
||||
|
||||
/// Returns numbers representing the sign of each lane.
|
||||
/// * `0` if the number is zero
|
||||
/// * `1` if the number is positive
|
||||
/// * `-1` if the number is negative
|
||||
#[inline]
|
||||
pub fn signum(self) -> Self {
|
||||
self.is_positive().select(
|
||||
Self::splat(1),
|
||||
self.is_negative().select(Self::splat(-1), Self::splat(0))
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -247,6 +247,15 @@ macro_rules! impl_signed_tests {
|
|||
&|_| true,
|
||||
);
|
||||
}
|
||||
|
||||
fn signum<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::signum,
|
||||
&Scalar::signum,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
test_helpers::test_lanes_panic! {
|
||||
|
@ -426,6 +435,14 @@ macro_rules! impl_float_tests {
|
|||
)
|
||||
}
|
||||
|
||||
fn mul_add<const LANES: usize>() {
|
||||
test_helpers::test_ternary_elementwise(
|
||||
&Vector::<LANES>::mul_add,
|
||||
&Scalar::mul_add,
|
||||
&|_, _, _| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn sqrt<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::sqrt,
|
||||
|
@ -433,6 +450,117 @@ macro_rules! impl_float_tests {
|
|||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn recip<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::recip,
|
||||
&Scalar::recip,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn to_degrees<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::to_degrees,
|
||||
&Scalar::to_degrees,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn to_radians<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::to_radians,
|
||||
&Scalar::to_radians,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn signum<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::signum,
|
||||
&Scalar::signum,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn copysign<const LANES: usize>() {
|
||||
test_helpers::test_binary_elementwise(
|
||||
&Vector::<LANES>::copysign,
|
||||
&Scalar::copysign,
|
||||
&|_, _| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn min<const LANES: usize>() {
|
||||
// Regular conditions (both values aren't zero)
|
||||
test_helpers::test_binary_elementwise(
|
||||
&Vector::<LANES>::min,
|
||||
&Scalar::min,
|
||||
// Reject the case where both values are zero with different signs
|
||||
&|a, b| {
|
||||
for (a, b) in a.iter().zip(b.iter()) {
|
||||
if *a == 0. && *b == 0. && a.signum() != b.signum() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
);
|
||||
|
||||
// Special case where both values are zero
|
||||
let p_zero = Vector::<LANES>::splat(0.);
|
||||
let n_zero = Vector::<LANES>::splat(-0.);
|
||||
assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
|
||||
assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
|
||||
}
|
||||
|
||||
fn max<const LANES: usize>() {
|
||||
// Regular conditions (both values aren't zero)
|
||||
test_helpers::test_binary_elementwise(
|
||||
&Vector::<LANES>::max,
|
||||
&Scalar::max,
|
||||
// Reject the case where both values are zero with different signs
|
||||
&|a, b| {
|
||||
for (a, b) in a.iter().zip(b.iter()) {
|
||||
if *a == 0. && *b == 0. && a.signum() != b.signum() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
);
|
||||
|
||||
// Special case where both values are zero
|
||||
let p_zero = Vector::<LANES>::splat(0.);
|
||||
let n_zero = Vector::<LANES>::splat(-0.);
|
||||
assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
|
||||
assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
|
||||
}
|
||||
|
||||
fn clamp<const LANES: usize>() {
|
||||
test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
|
||||
for (min, max) in min.iter_mut().zip(max.iter_mut()) {
|
||||
if max < min {
|
||||
core::mem::swap(min, max);
|
||||
}
|
||||
if min.is_nan() {
|
||||
*min = Scalar::NEG_INFINITY;
|
||||
}
|
||||
if max.is_nan() {
|
||||
*max = Scalar::INFINITY;
|
||||
}
|
||||
}
|
||||
|
||||
let mut result_scalar = [Scalar::default(); LANES];
|
||||
for i in 0..LANES {
|
||||
result_scalar[i] = value[i].clamp(min[i], max[i]);
|
||||
}
|
||||
let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
|
||||
test_helpers::prop_assert_biteq!(result_scalar, result_vector);
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
fn horizontal_sum<const LANES: usize>() {
|
||||
test_helpers::test_1(&|x| {
|
||||
test_helpers::prop_assert_biteq! (
|
||||
|
|
|
@ -97,6 +97,27 @@ pub fn test_2<A: core::fmt::Debug + DefaultStrategy, B: core::fmt::Debug + Defau
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
/// Test a function that takes two values.
|
||||
pub fn test_3<
|
||||
A: core::fmt::Debug + DefaultStrategy,
|
||||
B: core::fmt::Debug + DefaultStrategy,
|
||||
C: core::fmt::Debug + DefaultStrategy,
|
||||
>(
|
||||
f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult,
|
||||
) {
|
||||
let mut runner = proptest::test_runner::TestRunner::default();
|
||||
runner
|
||||
.run(
|
||||
&(
|
||||
A::default_strategy(),
|
||||
B::default_strategy(),
|
||||
C::default_strategy(),
|
||||
),
|
||||
|(a, b, c)| f(a, b, c),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Test a unary vector function against a unary scalar function, applied elementwise.
|
||||
#[inline(never)]
|
||||
pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
|
||||
|
@ -257,6 +278,47 @@ pub fn test_binary_scalar_lhs_elementwise<
|
|||
});
|
||||
}
|
||||
|
||||
/// Test a ternary vector function against a ternary scalar function, applied elementwise.
|
||||
#[inline(never)]
|
||||
pub fn test_ternary_elementwise<
|
||||
Scalar1,
|
||||
Scalar2,
|
||||
Scalar3,
|
||||
ScalarResult,
|
||||
Vector1,
|
||||
Vector2,
|
||||
Vector3,
|
||||
VectorResult,
|
||||
const LANES: usize,
|
||||
>(
|
||||
fv: &dyn Fn(Vector1, Vector2, Vector3) -> VectorResult,
|
||||
fs: &dyn Fn(Scalar1, Scalar2, Scalar3) -> ScalarResult,
|
||||
check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES], [Scalar3; LANES]) -> bool,
|
||||
) where
|
||||
Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy,
|
||||
Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy,
|
||||
Scalar3: Copy + Default + core::fmt::Debug + DefaultStrategy,
|
||||
ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy,
|
||||
Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
|
||||
Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
|
||||
Vector3: Into<[Scalar3; LANES]> + From<[Scalar3; LANES]> + Copy,
|
||||
VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
|
||||
{
|
||||
test_3(&|x: [Scalar1; LANES], y: [Scalar2; LANES], z: [Scalar3; LANES]| {
|
||||
proptest::prop_assume!(check(x, y, z));
|
||||
let result_1: [ScalarResult; LANES] = fv(x.into(), y.into(), z.into()).into();
|
||||
let result_2: [ScalarResult; LANES] = {
|
||||
let mut result = [ScalarResult::default(); LANES];
|
||||
for ((i1, (i2, i3)), o) in x.iter().zip(y.iter().zip(z.iter())).zip(result.iter_mut()) {
|
||||
*o = fs(*i1, *i2, *i3);
|
||||
}
|
||||
result
|
||||
};
|
||||
crate::prop_assert_biteq!(result_1, result_2);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
/// Expand a const-generic test into separate tests for each possible lane count.
|
||||
#[macro_export]
|
||||
macro_rules! test_lanes {
|
||||
|
|
Loading…
Reference in a new issue