diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs index d21cfbcdfce..e226e9fa154 100644 --- a/src/libcore/lib.rs +++ b/src/libcore/lib.rs @@ -79,7 +79,7 @@ #![feature(reflect)] #![feature(rustc_attrs)] #![cfg_attr(stage0, feature(simd))] -#![cfg_attr(not(stage0), feature(repr_simd))] +#![cfg_attr(not(stage0), feature(repr_simd, platform_intrinsics))] #![feature(staged_api)] #![feature(unboxed_closures)] diff --git a/src/libcore/simd.rs b/src/libcore/simd.rs index d58d0c50a89..fb39b3accc3 100644 --- a/src/libcore/simd.rs +++ b/src/libcore/simd.rs @@ -10,25 +10,12 @@ //! SIMD vectors. //! -//! These types can be used for accessing basic SIMD operations. Each of them -//! implements the standard arithmetic operator traits (Add, Sub, Mul, Div, -//! Rem, Shl, Shr) through compiler magic, rather than explicitly. Currently +//! These types can be used for accessing basic SIMD operations. Currently //! comparison operators are not implemented. To use SSE3+, you must enable //! the features, like `-C target-feature=sse3,sse4.1,sse4.2`, or a more //! specific `target-cpu`. No other SIMD intrinsics or high-level wrappers are //! provided beyond this module. //! -//! ```rust -//! #![feature(core_simd)] -//! -//! fn main() { -//! use std::simd::f32x4; -//! let a = f32x4(40.0, 41.0, 42.0, 43.0); -//! let b = f32x4(1.0, 1.1, 3.4, 9.8); -//! println!("{:?}", a + b); -//! } -//! ``` -//! //! # Stability Note //! //! These are all experimental. The interface may change entirely, without @@ -44,6 +31,30 @@ #![allow(missing_docs)] #![allow(deprecated)] +use ops::{Add, Sub, Mul, Div, Shl, Shr, BitAnd, BitOr, BitXor}; + +// FIXME(stage0): the contents of macro can be inlined. +// ABIs are verified as valid as soon as they are parsed, i.e. before +// `cfg` stripping. The `platform-intrinsic` ABI is new, so stage0 +// doesn't know about it, but it still errors out when it hits it +// (despite this being in a `cfg(not(stage0))` module). +macro_rules! argh { + () => { + extern "platform-intrinsic" { + fn simd_add(x: T, y: T) -> T; + fn simd_sub(x: T, y: T) -> T; + fn simd_mul(x: T, y: T) -> T; + fn simd_div(x: T, y: T) -> T; + fn simd_shl(x: T, y: T) -> T; + fn simd_shr(x: T, y: T) -> T; + fn simd_and(x: T, y: T) -> T; + fn simd_or(x: T, y: T) -> T; + fn simd_xor(x: T, y: T) -> T; + } + } +} +argh!(); + #[repr(simd)] #[derive(Copy, Clone, Debug)] #[repr(C)] @@ -101,3 +112,32 @@ pub struct f32x4(pub f32, pub f32, pub f32, pub f32); #[derive(Copy, Clone, Debug)] #[repr(C)] pub struct f64x2(pub f64, pub f64); + +macro_rules! impl_traits { + ($($trayt: ident, $method: ident, $func: ident: $($ty: ty),*;)*) => { + $($( + impl $trayt<$ty> for $ty { + type Output = Self; + fn $method(self, other: Self) -> Self { + unsafe { + $func(self, other) + } + } + } + )*)* + } +} + +impl_traits! { + Add, add, simd_add: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2; + Sub, sub, simd_sub: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2; + Mul, mul, simd_mul: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2; + + Div, div, simd_div: f32x4, f64x2; + + Shl, shl, simd_shl: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2; + Shr, shr, simd_shr: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2; + BitAnd, bitand, simd_and: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2; + BitOr, bitor, simd_or: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2; + BitXor, bitxor, simd_xor: u8x16, u16x8, u32x4, u64x2, i8x16, i16x8, i32x4, i64x2; +} diff --git a/src/test/bench/shootout-spectralnorm.rs b/src/test/bench/shootout-spectralnorm.rs index b3591477022..a6c77eaf7c6 100644 --- a/src/test/bench/shootout-spectralnorm.rs +++ b/src/test/bench/shootout-spectralnorm.rs @@ -91,7 +91,7 @@ fn mult(v: &[f64], out: &mut [f64], start: usize, a: F) for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { let top = f64x2(chunk[0], chunk[1]); let bot = f64x2(a(i, j), a(i, j + 1)); - sum += top / bot; + sum = sum + top / bot; } let f64x2(a, b) = sum; *slot = a + b;