From 7fdd058c609e9cc727b44a63fd4d9d1ad3cef206 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 27 Jul 2019 17:48:24 +0200 Subject: [PATCH 01/17] Emulate some simd intrinsics --- example/std_example.rs | 16 ++++++ src/intrinsics.rs | 127 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 2a9df999559..7deaddd7df7 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -3,6 +3,7 @@ use std::io::Write; use std::intrinsics; + fn main() { let _ = ::std::iter::repeat('a' as u8).take(10).collect::>(); let stderr = ::std::io::stderr(); @@ -43,6 +44,21 @@ fn main() { assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 >> 64, 0xFEDCBA98765432u128); assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 as i128 >> 64, 0xFEDCBA98765432i128); assert_eq!(353985398u128 * 932490u128, 330087843781020u128); + + unsafe { + test_simd(); + } +} + +#[target_feature(enable = "sse2")] +unsafe fn test_simd() { + use std::arch::x86_64::*; + + let x = _mm_setzero_si128(); + let y = _mm_set1_epi16(7); + let or = _mm_or_si128(x, y); + + assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); } #[derive(PartialEq)] diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 1f86b096e63..8efeb0b4acc 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -116,6 +116,64 @@ macro_rules! atomic_minmax { }; } +fn lane_type_and_count<'tcx>( + fx: &FunctionCx<'_, 'tcx, impl Backend>, + layout: TyLayout<'tcx>, + intrinsic: &str, +) -> (TyLayout<'tcx>, usize) { + let lane_count = match layout.fields { + layout::FieldPlacement::Array { stride: _, count } => usize::try_from(count).unwrap(), + _ => panic!("Non vector type {:?} passed to or returned from simd_* intrinsic {}", layout.ty, intrinsic), + }; + let lane_layout = layout.field(fx, 0); + (lane_layout, lane_count) +} + +fn simd_for_each_lane<'tcx, B: Backend>( + fx: &mut FunctionCx<'_, 'tcx, B>, + intrinsic: &str, + x: CValue<'tcx>, + y: CValue<'tcx>, + ret: CPlace<'tcx>, + f: impl Fn(&mut FunctionCx<'_, 'tcx, B>, TyLayout<'tcx>, TyLayout<'tcx>, Value, Value) -> CValue<'tcx>, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_layout, lane_count) = lane_type_and_count(fx, layout, intrinsic); + let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); + assert_eq!(lane_count, ret_lane_count); + + for lane in 0..lane_count { + let lane = mir::Field::new(lane); + let x_lane = x.value_field(fx, lane).load_scalar(fx); + let y_lane = y.value_field(fx, lane).load_scalar(fx); + + let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane); + + ret.place_field(fx, lane).write_cvalue(fx, res_lane); + } +} + +macro_rules! simd_binop { + ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = fx.bcx.ins().$op(x_lane, y_lane); + CValue::by_val(res_lane, ret_lane_layout) + }); + }; + ($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + }; +} + pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( fx: &mut FunctionCx<'a, 'tcx, impl Backend>, def_id: DefId, @@ -180,12 +238,6 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( cosf64(flt) -> f64 => cos, tanf32(flt) -> f32 => tanf, tanf64(flt) -> f64 => tan, - - // minmax - minnumf32(a, b) -> f32 => fminf, - minnumf64(a, b) -> f64 => fmin, - maxnumf32(a, b) -> f32 => fmaxf, - maxnumf64(a, b) -> f64 => fmax, } intrinsic_match! { @@ -675,6 +727,69 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( _ if intrinsic.starts_with("atomic_umin"), (v ptr, v src) { atomic_minmax!(fx, IntCC::UnsignedLessThan, (ptr, src) -> ret); }; + + minnumf32, (v a, v b) { + let val = fx.bcx.ins().fmin(a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32)); + ret.write_cvalue(fx, val); + }; + minnumf64, (v a, v b) { + let val = fx.bcx.ins().fmin(a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); + ret.write_cvalue(fx, val); + }; + maxnumf32, (v a, v b) { + let val = fx.bcx.ins().fmax(a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32)); + ret.write_cvalue(fx, val); + }; + maxnumf64, (v a, v b) { + let val = fx.bcx.ins().fmax(a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); + ret.write_cvalue(fx, val); + }; + + simd_cast, (c x) { + ret.write_cvalue(fx, x.unchecked_cast_to(ret.layout())); + }; + + simd_add, (c x, c y) { + simd_binop!(fx, intrinsic, iadd(x, y) -> ret); + }; + simd_sub, (c x, c y) { + simd_binop!(fx, intrinsic, isub(x, y) -> ret); + }; + simd_mul, (c x, c y) { + simd_binop!(fx, intrinsic, imul(x, y) -> ret); + }; + simd_div, (c x, c y) { + simd_binop!(fx, intrinsic, udiv|sdiv(x, y) -> ret); + }; + simd_rem, (c x, c y) { + simd_binop!(fx, intrinsic, urem|srem(x, y) -> ret); + }; + simd_shl, (c x, c y) { + simd_binop!(fx, intrinsic, ishl(x, y) -> ret); + }; + simd_shr, (c x, c y) { + simd_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret); + }; + simd_and, (c x, c y) { + simd_binop!(fx, intrinsic, band(x, y) -> ret); + }; + simd_or, (c x, c y) { + simd_binop!(fx, intrinsic, bor(x, y) -> ret); + }; + simd_bxor, (c x, c y) { + simd_binop!(fx, intrinsic, bxor(x, y) -> ret); + }; + + simd_fmin, (c x, c y) { + simd_binop!(fx, intrinsic, fmin(x, y) -> ret); + }; + simd_fmax, (c x, c y) { + simd_binop!(fx, intrinsic, fmax(x, y) -> ret); + }; } if let Some((_, dest)) = destination { From 9e3f2391b8f9da831c8aa25ce8cdc4eb4dc300ef Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 27 Jul 2019 17:52:57 +0200 Subject: [PATCH 02/17] Emulate compare simd intrinsics --- src/intrinsics.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 8efeb0b4acc..3fd1d5fd6d4 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -156,6 +156,25 @@ fn simd_for_each_lane<'tcx, B: Backend>( } macro_rules! simd_binop { + ($fx:expr, $intrinsic:expr, icmp($cc:ident, $x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane); + let res_lane = fx.bcx.ins().bint(types::I8, res_lane); + CValue::by_val(res_lane, ret_lane_layout) + }); + }; + ($fx:expr, $intrinsic:expr, icmp($cc_u:ident|$cc_s:ident, $x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + let res_lane = fx.bcx.ins().bint(types::I8, res_lane); + CValue::by_val(res_lane, ret_lane_layout) + }); + }; + ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { let res_lane = fx.bcx.ins().$op(x_lane, y_lane); @@ -753,6 +772,25 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( ret.write_cvalue(fx, x.unchecked_cast_to(ret.layout())); }; + simd_eq, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(Equal, x, y) -> ret); + }; + simd_ne, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(NotEqual, x, y) -> ret); + }; + simd_lt, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(UnsignedLessThan|SignedLessThan, x, y) -> ret); + }; + simd_le, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(UnsignedLessThanOrEqual|SignedLessThanOrEqual, x, y) -> ret); + }; + simd_gt, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(UnsignedGreaterThan|SignedGreaterThan, x, y) -> ret); + }; + simd_ge, (c x, c y) { + simd_binop!(fx, intrinsic, icmp(UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual, x, y) -> ret); + }; + simd_add, (c x, c y) { simd_binop!(fx, intrinsic, iadd(x, y) -> ret); }; From 90f2b12d473e51ba16267178aeb576edda11123a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 28 Jul 2019 09:45:01 +0200 Subject: [PATCH 03/17] Fix simd comparison --- example/std_example.rs | 4 ++++ src/intrinsics.rs | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 7deaddd7df7..7fe1d082e34 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -57,8 +57,12 @@ unsafe fn test_simd() { let x = _mm_setzero_si128(); let y = _mm_set1_epi16(7); let or = _mm_or_si128(x, y); + let cmp_eq = _mm_cmpeq_epi8(y, y); + let cmp_lt = _mm_cmplt_epi8(y, y); assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); } #[derive(PartialEq)] diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 3fd1d5fd6d4..a2451ff464b 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -155,26 +155,40 @@ fn simd_for_each_lane<'tcx, B: Backend>( } } -macro_rules! simd_binop { - ($fx:expr, $intrinsic:expr, icmp($cc:ident, $x:ident, $y:ident) -> $ret:ident) => { - simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { +fn bool_to_zero_or_max_uint<'tcx>( + fx: &mut FunctionCx<'_, 'tcx, impl Backend>, + layout: TyLayout<'tcx>, + val: Value, +) -> CValue<'tcx> { + let ty = fx.clif_type(layout.ty).unwrap(); + + let zero = fx.bcx.ins().iconst(ty, 0); + let max = fx.bcx.ins().iconst(ty, (u64::max_value() >> (64 - ty.bits())) as i64); + let res = crate::common::codegen_select(&mut fx.bcx, val, max, zero); + CValue::by_val(res, layout) +} + +macro_rules! simd_cmp { + ($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, res_lane_layout, x_lane, y_lane| { let res_lane = fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane); - let res_lane = fx.bcx.ins().bint(types::I8, res_lane); - CValue::by_val(res_lane, ret_lane_layout) + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) }); }; - ($fx:expr, $intrinsic:expr, icmp($cc_u:ident|$cc_s:ident, $x:ident, $y:ident) -> $ret:ident) => { - simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + ($fx:expr, $intrinsic:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| { let res_lane = match lane_layout.ty.sty { ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), }; - let res_lane = fx.bcx.ins().bint(types::I8, res_lane); - CValue::by_val(res_lane, ret_lane_layout) + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) }); }; +} + +macro_rules! simd_binop { ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { let res_lane = fx.bcx.ins().$op(x_lane, y_lane); @@ -773,22 +787,22 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( }; simd_eq, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(Equal, x, y) -> ret); + simd_cmp!(fx, intrinsic, Equal(x, y) -> ret); }; simd_ne, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(NotEqual, x, y) -> ret); + simd_cmp!(fx, intrinsic, NotEqual(x, y) -> ret); }; simd_lt, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(UnsignedLessThan|SignedLessThan, x, y) -> ret); + simd_cmp!(fx, intrinsic, UnsignedLessThan|SignedLessThan(x, y) -> ret); }; simd_le, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(UnsignedLessThanOrEqual|SignedLessThanOrEqual, x, y) -> ret); + simd_cmp!(fx, intrinsic, UnsignedLessThanOrEqual|SignedLessThanOrEqual(x, y) -> ret); }; simd_gt, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(UnsignedGreaterThan|SignedGreaterThan, x, y) -> ret); + simd_cmp!(fx, intrinsic, UnsignedGreaterThan|SignedGreaterThan(x, y) -> ret); }; simd_ge, (c x, c y) { - simd_binop!(fx, intrinsic, icmp(UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual, x, y) -> ret); + simd_cmp!(fx, intrinsic, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret); }; simd_add, (c x, c y) { From 1a2689117013d7137f207dc85da7df57df5e0af9 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 28 Jul 2019 09:54:57 +0200 Subject: [PATCH 04/17] Replace llvm intrinsics with runtime trap --- src/abi.rs | 5 +++++ src/lib.rs | 1 + src/llvm_intrinsics.rs | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 src/llvm_intrinsics.rs diff --git a/src/abi.rs b/src/abi.rs index 92872a14359..0fa546f50e5 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -643,6 +643,11 @@ pub fn codegen_terminator_call<'a, 'tcx: 'a>( let instance = ty::Instance::resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, substs).unwrap(); + if fx.tcx.symbol_name(instance).as_str().starts_with("llvm.") { + crate::llvm_intrinsics::codegen_llvm_intrinsic_call(fx, &fx.tcx.symbol_name(instance).as_str(), substs, args, destination); + return; + } + match instance.def { InstanceDef::Intrinsic(_) => { crate::intrinsics::codegen_intrinsic_call(fx, def_id, substs, args, destination); diff --git a/src/lib.rs b/src/lib.rs index fb23522738d..cdaabcf9a39 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ mod debuginfo; mod driver; mod intrinsics; mod linkage; +mod llvm_intrinsics; mod main_shim; mod metadata; mod pretty_clif; diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs new file mode 100644 index 00000000000..63627c41a2c --- /dev/null +++ b/src/llvm_intrinsics.rs @@ -0,0 +1,21 @@ +use crate::prelude::*; + +use rustc::ty::subst::SubstsRef; + +pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( + fx: &mut FunctionCx<'a, 'tcx, impl Backend>, + intrinsic: &str, + substs: SubstsRef<'tcx>, + args: Vec>, + destination: Option<(CPlace<'tcx>, BasicBlock)>, +) { + fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); + crate::trap::trap_unimplemented(fx, intrinsic); + + if let Some((_, dest)) = destination { + let ret_ebb = fx.get_ebb(dest); + fx.bcx.ins().jump(ret_ebb, &[]); + } else { + trap_unreachable(fx, "[corruption] Diverging intrinsic returned."); + } +} From 78e0525366d0e2873797624a558fd026d8809cde Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 28 Jul 2019 10:24:57 +0200 Subject: [PATCH 05/17] [WIP] simd_shuffle support --- src/intrinsics.rs | 23 ++++++++++++++++++++++- src/llvm_intrinsics.rs | 8 ++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/intrinsics.rs b/src/intrinsics.rs index a2451ff464b..5c7f8b46f5c 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -121,6 +121,7 @@ fn lane_type_and_count<'tcx>( layout: TyLayout<'tcx>, intrinsic: &str, ) -> (TyLayout<'tcx>, usize) { + assert!(layout.ty.is_simd()); let lane_count = match layout.fields { layout::FieldPlacement::Array { stride: _, count } => usize::try_from(count).unwrap(), _ => panic!("Non vector type {:?} passed to or returned from simd_* intrinsic {}", layout.ty, intrinsic), @@ -805,6 +806,26 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( simd_cmp!(fx, intrinsic, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret); }; + // simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U + _ if intrinsic.starts_with("simd_shuffle"), (c x, c y, c idx) { + let n: usize = intrinsic["simd_shuffle".len()..].parse().unwrap(); + + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_type, lane_count) = lane_type_and_count(fx, layout, intrinsic); + let (ret_lane_type, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); + + assert_eq!(lane_type, ret_lane_type); + assert_eq!(n, ret_lane_count); + + let total_len = lane_count * 2; + + // TODO get shuffle indices + fx.tcx.sess.warn("simd_shuffle* not yet implemented"); + crate::trap::trap_unimplemented(fx, "simd_shuffle* not yet implemented"); + }; + simd_add, (c x, c y) { simd_binop!(fx, intrinsic, iadd(x, y) -> ret); }; @@ -832,7 +853,7 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( simd_or, (c x, c y) { simd_binop!(fx, intrinsic, bor(x, y) -> ret); }; - simd_bxor, (c x, c y) { + simd_xor, (c x, c y) { simd_binop!(fx, intrinsic, bxor(x, y) -> ret); }; diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index 63627c41a2c..765adafa10b 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -19,3 +19,11 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( trap_unreachable(fx, "[corruption] Diverging intrinsic returned."); } } + +// llvm.x86.sse2.pmovmskb.128 +// llvm.x86.avx2.vperm2i128 +// llvm.x86.ssse3.pshuf.b.128 +// llvm.x86.avx2.pshuf.b +// llvm.x86.avx2.pmovmskb +// llvm.x86.avx2.psrli.w +// llvm.x86.sse2.psrli.w From 76b89476c3aa8ae96d544684218f772223b18031 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 28 Jul 2019 11:24:33 +0200 Subject: [PATCH 06/17] [WIP] simd_shuffle* --- example/std_example.rs | 2 + src/abi.rs | 44 ++++++++++----------- src/intrinsics.rs | 89 ++++++++++++++++++++++++++++++++++-------- src/llvm_intrinsics.rs | 2 +- 4 files changed, 98 insertions(+), 39 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 7fe1d082e34..8a4a6337ca7 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -59,10 +59,12 @@ unsafe fn test_simd() { let or = _mm_or_si128(x, y); let cmp_eq = _mm_cmpeq_epi8(y, y); let cmp_lt = _mm_cmplt_epi8(y, y); + let shl = _mm_slli_si128(y, 1); assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 0]); } #[derive(PartialEq)] diff --git a/src/abi.rs b/src/abi.rs index 0fa546f50e5..3de9ea60e71 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -613,28 +613,6 @@ pub fn codegen_terminator_call<'a, 'tcx: 'a>( let fn_ty = fx.monomorphize(&func.ty(fx.mir, fx.tcx)); let sig = fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx)); - // Unpack arguments tuple for closures - let args = if sig.abi == Abi::RustCall { - assert_eq!(args.len(), 2, "rust-call abi requires two arguments"); - let self_arg = trans_operand(fx, &args[0]); - let pack_arg = trans_operand(fx, &args[1]); - let mut args = Vec::new(); - args.push(self_arg); - match pack_arg.layout().ty.sty { - ty::Tuple(ref tupled_arguments) => { - for (i, _) in tupled_arguments.iter().enumerate() { - args.push(pack_arg.value_field(fx, mir::Field::new(i))); - } - } - _ => bug!("argument to function with \"rust-call\" ABI is not a tuple"), - } - args - } else { - args.into_iter() - .map(|arg| trans_operand(fx, arg)) - .collect::>() - }; - let destination = destination .as_ref() .map(|&(ref place, bb)| (trans_place(fx, place), bb)); @@ -664,6 +642,28 @@ pub fn codegen_terminator_call<'a, 'tcx: 'a>( } } + // Unpack arguments tuple for closures + let args = if sig.abi == Abi::RustCall { + assert_eq!(args.len(), 2, "rust-call abi requires two arguments"); + let self_arg = trans_operand(fx, &args[0]); + let pack_arg = trans_operand(fx, &args[1]); + let mut args = Vec::new(); + args.push(self_arg); + match pack_arg.layout().ty.sty { + ty::Tuple(ref tupled_arguments) => { + for (i, _) in tupled_arguments.iter().enumerate() { + args.push(pack_arg.value_field(fx, mir::Field::new(i))); + } + } + _ => bug!("argument to function with \"rust-call\" ABI is not a tuple"), + } + args + } else { + args.into_iter() + .map(|arg| trans_operand(fx, arg)) + .collect::>() + }; + codegen_call_inner( fx, Some(func), diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 5c7f8b46f5c..c8d8bd7a793 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -12,11 +12,14 @@ macro_rules! intrinsic_pat { } macro_rules! intrinsic_arg { - (c $fx:expr, $arg:ident) => { + (o $fx:expr, $arg:ident) => { $arg }; + (c $fx:expr, $arg:ident) => { + trans_operand($fx, $arg) + }; (v $fx:expr, $arg:ident) => { - $arg.load_scalar($fx) + trans_operand($fx, $arg).load_scalar($fx) }; } @@ -40,9 +43,9 @@ macro_rules! intrinsic_match { $( intrinsic_substs!($substs, 0, $($subst),*); )? - if let [$($arg),*] = *$args { - let ($($arg),*) = ( - $(intrinsic_arg!($a $fx, $arg)),* + if let [$($arg),*] = $args { + let ($($arg,)*) = ( + $(intrinsic_arg!($a $fx, $arg),)* ); #[warn(unused_parens, non_snake_case)] { @@ -67,7 +70,10 @@ macro_rules! call_intrinsic_match { $( stringify!($name) => { assert!($substs.is_noop()); - if let [$($arg),*] = *$args { + if let [$(ref $arg),*] = *$args { + let ($($arg,)*) = ( + $(trans_operand($fx, $arg),)* + ); let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty); $ret.write_cvalue($fx, res); @@ -120,10 +126,10 @@ fn lane_type_and_count<'tcx>( fx: &FunctionCx<'_, 'tcx, impl Backend>, layout: TyLayout<'tcx>, intrinsic: &str, -) -> (TyLayout<'tcx>, usize) { +) -> (TyLayout<'tcx>, u32) { assert!(layout.ty.is_simd()); let lane_count = match layout.fields { - layout::FieldPlacement::Array { stride: _, count } => usize::try_from(count).unwrap(), + layout::FieldPlacement::Array { stride: _, count } => u32::try_from(count).unwrap(), _ => panic!("Non vector type {:?} passed to or returned from simd_* intrinsic {}", layout.ty, intrinsic), }; let lane_layout = layout.field(fx, 0); @@ -146,7 +152,7 @@ fn simd_for_each_lane<'tcx, B: Backend>( assert_eq!(lane_count, ret_lane_count); for lane in 0..lane_count { - let lane = mir::Field::new(lane); + let lane = mir::Field::new(lane.try_into().unwrap()); let x_lane = x.value_field(fx, lane).load_scalar(fx); let y_lane = y.value_field(fx, lane).load_scalar(fx); @@ -212,7 +218,7 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( fx: &mut FunctionCx<'a, 'tcx, impl Backend>, def_id: DefId, substs: SubstsRef<'tcx>, - args: Vec>, + args: &[mir::Operand<'tcx>], destination: Option<(CPlace<'tcx>, BasicBlock)>, ) { let intrinsic = fx.tcx.item_name(def_id).as_str(); @@ -499,7 +505,7 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64); let base_val = base.load_scalar(fx); let res = fx.bcx.ins().iadd(base_val, ptr_diff); - ret.write_cvalue(fx, CValue::by_val(res, args[0].layout())); + ret.write_cvalue(fx, CValue::by_val(res, base.layout())); }; transmute, (c from) { @@ -807,8 +813,8 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( }; // simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U - _ if intrinsic.starts_with("simd_shuffle"), (c x, c y, c idx) { - let n: usize = intrinsic["simd_shuffle".len()..].parse().unwrap(); + _ if intrinsic.starts_with("simd_shuffle"), (c x, c y, o idx) { + let n: u32 = intrinsic["simd_shuffle".len()..].parse().unwrap(); assert_eq!(x.layout(), y.layout()); let layout = x.layout(); @@ -821,9 +827,60 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( let total_len = lane_count * 2; - // TODO get shuffle indices - fx.tcx.sess.warn("simd_shuffle* not yet implemented"); - crate::trap::trap_unimplemented(fx, "simd_shuffle* not yet implemented"); + let indexes = { + use rustc::mir::interpret::*; + let idx_place = match idx { + Operand::Copy(idx_place) => { + idx_place + } + _ => panic!("simd_shuffle* idx is not Operand::Copy, but {:?}", idx), + }; + + assert!(idx_place.projection.is_none()); + let static_ = match &idx_place.base { + PlaceBase::Static(static_) => { + static_ + } + PlaceBase::Local(_) => panic!("simd_shuffle* idx is not constant, but a local"), + }; + + let idx_const = match &static_.kind { + StaticKind::Static(_) => unimplemented!(), + StaticKind::Promoted(promoted) => { + fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId { + instance: fx.instance, + promoted: Some(*promoted), + })).unwrap() + } + }; + + let idx_bytes = match idx_const.val { + ConstValue::ByRef { align: _, offset, alloc } => { + let ptr = Pointer::new(AllocId(0 /* dummy */), offset); + let size = Size::from_bytes(4 * u64::from(ret_lane_count) /* size_of([u32; ret_lane_count]) */); + alloc.get_bytes(fx, ptr, size).unwrap() + } + _ => unreachable!("{:?}", idx_const), + }; + + (0..ret_lane_count).map(|i| { + let i = usize::try_from(i).unwrap(); + let idx = rustc::mir::interpret::read_target_uint( + fx.tcx.data_layout.endian, + &idx_bytes[4*i.. 4*i + 4], + ).expect("read_target_uint"); + u32::try_from(idx).expect("try_from u32") + }).collect::>() + }; + + for &idx in &indexes { + assert!(idx < total_len, "idx {} out of range 0..{}", idx, total_len); + } + + + + println!("{:?}", indexes); + unimplemented!(); }; simd_add, (c x, c y) { diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index 765adafa10b..1ffd43bb780 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -6,7 +6,7 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( fx: &mut FunctionCx<'a, 'tcx, impl Backend>, intrinsic: &str, substs: SubstsRef<'tcx>, - args: Vec>, + args: &[mir::Operand<'tcx>], destination: Option<(CPlace<'tcx>, BasicBlock)>, ) { fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); From 9cb787fe7057311741d63ee26f3e14b4d58893d5 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 11:23:53 +0200 Subject: [PATCH 07/17] Implement and test simd_shuffle* --- example/std_example.rs | 52 ++++++++++++++++++++++++++++++++++++++++-- src/intrinsics.rs | 13 +++++++---- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 8a4a6337ca7..9da701d4469 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -59,12 +59,60 @@ unsafe fn test_simd() { let or = _mm_or_si128(x, y); let cmp_eq = _mm_cmpeq_epi8(y, y); let cmp_lt = _mm_cmplt_epi8(y, y); - let shl = _mm_slli_si128(y, 1); assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 0]); + + test_mm_slli_si128(); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_slli_si128() { + use std::arch::x86_64::*; + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 1); + let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 15); + let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 16); + assert_eq_m128i(r, _mm_set1_epi8(0)); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, -1); + assert_eq_m128i(_mm_set1_epi8(0), r); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, -0x80000000); + assert_eq_m128i(r, _mm_set1_epi8(0)); +} + +fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { + unsafe { + assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); + } } #[derive(PartialEq)] diff --git a/src/intrinsics.rs b/src/intrinsics.rs index c8d8bd7a793..251299dcdbe 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -877,10 +877,15 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( assert!(idx < total_len, "idx {} out of range 0..{}", idx, total_len); } - - - println!("{:?}", indexes); - unimplemented!(); + for (out_idx, in_idx) in indexes.into_iter().enumerate() { + let in_lane = if in_idx < lane_count { + x.value_field(fx, mir::Field::new(in_idx.try_into().unwrap())) + } else { + y.value_field(fx, mir::Field::new((in_idx - lane_count).try_into().unwrap())) + }; + let out_lane = ret.place_field(fx, mir::Field::new(out_idx)); + out_lane.write_cvalue(fx, in_lane); + } }; simd_add, (c x, c y) { From 48a6b581b557f1120e3cb59f2ce51b1b0384275a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 12:43:24 +0200 Subject: [PATCH 08/17] Emulate llvm.x86.sse2.pmovmskb.128 llvm intrinsic --- src/intrinsics.rs | 43 ++++++++++++++++++++++++-------------- src/llvm_intrinsics.rs | 47 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 19 deletions(-) diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 251299dcdbe..042a0934709 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -2,42 +2,50 @@ use crate::prelude::*; use rustc::ty::subst::SubstsRef; -macro_rules! intrinsic_pat { +macro intrinsic_pat { (_) => { _ - }; + }, ($name:ident) => { stringify!($name) + }, + ($name:literal) => { + stringify!($name) + }, + ($x:ident . $($xs:tt).*) => { + concat!(stringify!($x), ".", intrinsic_pat!($($xs).*)) } } -macro_rules! intrinsic_arg { +macro intrinsic_arg { (o $fx:expr, $arg:ident) => { $arg - }; + }, (c $fx:expr, $arg:ident) => { trans_operand($fx, $arg) - }; + }, (v $fx:expr, $arg:ident) => { trans_operand($fx, $arg).load_scalar($fx) - }; + } } -macro_rules! intrinsic_substs { - ($substs:expr, $index:expr,) => {}; +macro intrinsic_substs { + ($substs:expr, $index:expr,) => {}, ($substs:expr, $index:expr, $first:ident $(,$rest:ident)*) => { let $first = $substs.type_at($index); intrinsic_substs!($substs, $index+1, $($rest),*); - }; + } } -macro_rules! intrinsic_match { - ($fx:expr, $intrinsic:expr, $substs:expr, $args:expr, $( - $($name:tt)|+ $(if $cond:expr)?, $(<$($subst:ident),*>)? ($($a:ident $arg:ident),*) $content:block; +pub macro intrinsic_match { + ($fx:expr, $intrinsic:expr, $substs:expr, $args:expr, + _ => $unknown:block; + $( + $($($name:tt).*)|+ $(if $cond:expr)?, $(<$($subst:ident),*>)? ($($a:ident $arg:ident),*) $content:block; )*) => { match $intrinsic { $( - $(intrinsic_pat!($name))|* $(if $cond)? => { + $(intrinsic_pat!($($name).*))|* $(if $cond)? => { #[allow(unused_parens, non_snake_case)] { $( @@ -57,9 +65,9 @@ macro_rules! intrinsic_match { } } )* - _ => unimpl!("unsupported intrinsic {}", $intrinsic), + _ => $unknown, } - }; + } } macro_rules! call_intrinsic_match { @@ -122,7 +130,7 @@ macro_rules! atomic_minmax { }; } -fn lane_type_and_count<'tcx>( +pub fn lane_type_and_count<'tcx>( fx: &FunctionCx<'_, 'tcx, impl Backend>, layout: TyLayout<'tcx>, intrinsic: &str, @@ -282,6 +290,9 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( intrinsic_match! { fx, intrinsic, substs, args, + _ => { + unimpl!("unsupported intrinsic {}", intrinsic) + }; assume, (c _a) {}; likely | unlikely, (c a) { diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index 1ffd43bb780..bb993298411 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -9,8 +9,50 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( args: &[mir::Operand<'tcx>], destination: Option<(CPlace<'tcx>, BasicBlock)>, ) { - fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); - crate::trap::trap_unimplemented(fx, intrinsic); + let ret = match destination { + Some((place, _)) => place, + None => { + // Insert non returning intrinsics here + match intrinsic { + "abort" => { + trap_panic(fx, "Called intrinsic::abort."); + } + "unreachable" => { + trap_unreachable(fx, "[corruption] Called intrinsic::unreachable."); + } + _ => unimplemented!("unsupported instrinsic {}", intrinsic), + } + return; + } + }; + + crate::intrinsics::intrinsic_match! { + fx, intrinsic, substs, args, + _ => { + fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); + crate::trap::trap_unimplemented(fx, intrinsic); + }; + + // Used by _mm_movemask_epi8 + llvm.x86.sse2.pmovmskb.128, (c a) { + let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic); + assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty); + assert_eq!(lane_count, 16); + + let mut res = fx.bcx.ins().iconst(types::I32, 0); + + for lane in 0..16 { + let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); + let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int + let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign); + res = fx.bcx.ins().ishl_imm(res, 1); + res = fx.bcx.ins().bor(res, a_lane_sign); + } + + let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32)); + ret.write_cvalue(fx, res); + }; + } if let Some((_, dest)) = destination { let ret_ebb = fx.get_ebb(dest); @@ -20,7 +62,6 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( } } -// llvm.x86.sse2.pmovmskb.128 // llvm.x86.avx2.vperm2i128 // llvm.x86.ssse3.pshuf.b.128 // llvm.x86.avx2.pshuf.b From 63646b1956375836fac8610d04da5d2e8ce04559 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 12:50:20 +0200 Subject: [PATCH 09/17] Implement llvm.x86.avx2.pmovmskb llvm intrinsic --- example/std_example.rs | 27 +++++++++++++++++++++++++++ src/llvm_intrinsics.rs | 9 ++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 9da701d4469..e3b3edd86af 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -65,6 +65,8 @@ unsafe fn test_simd() { assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); test_mm_slli_si128(); + test_mm_movemask_epi8(); + test_mm256_movemask_epi8(); } #[target_feature(enable = "sse2")] @@ -109,6 +111,31 @@ unsafe fn test_mm_slli_si128() { assert_eq_m128i(r, _mm_set1_epi8(0)); } +#[target_feature(enable = "sse2")] +unsafe fn test_mm_movemask_epi8() { + use std::arch::x86_64::*; + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, + 0b0101, 0b1111_0000u8 as i8, 0, 0, + 0, 0, 0b1111_0000u8 as i8, 0b0101, + 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, + ); + let r = _mm_movemask_epi8(a); + assert_eq!(r, 0b10100100_00100101); +} + +#[target_feature(enable = "avx2")] +unsafe fn test_mm256_movemask_epi8() { + use std::arch::x86_64::*; + + let a = _mm256_set1_epi8(-1); + let r = _mm256_movemask_epi8(a); + let e = -1; + assert_eq!(r, e); +} + fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { unsafe { assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index bb993298411..32aa8b5d3df 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -33,15 +33,15 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( crate::trap::trap_unimplemented(fx, intrinsic); }; - // Used by _mm_movemask_epi8 - llvm.x86.sse2.pmovmskb.128, (c a) { + // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` + llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) { let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic); assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty); - assert_eq!(lane_count, 16); + assert!(lane_count == 16 || lane_count == 32); let mut res = fx.bcx.ins().iconst(types::I32, 0); - for lane in 0..16 { + for lane in 0..lane_count { let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign); @@ -65,6 +65,5 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( // llvm.x86.avx2.vperm2i128 // llvm.x86.ssse3.pshuf.b.128 // llvm.x86.avx2.pshuf.b -// llvm.x86.avx2.pmovmskb // llvm.x86.avx2.psrli.w // llvm.x86.sse2.psrli.w From 49b21f27309b423c702bf184e653eb9f669ef30a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 13:18:21 +0200 Subject: [PATCH 10/17] Fix returning (u128, u128) --- example/mini_core_hello_world.rs | 10 ++++++++++ src/abi.rs | 14 ++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 641f0c98912..380bc487bcc 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -117,6 +117,14 @@ impl CoerceUnsized> for Unique where T: Unsiz fn take_f32(_f: f32) {} fn take_unique(_u: Unique<()>) {} +fn return_u128_pair() -> (u128, u128) { + (0, 0) +} + +fn call_return_u128_pair() { + return_u128_pair(); +} + fn main() { take_unique(Unique { pointer: 0 as *const (), @@ -124,6 +132,8 @@ fn main() { }); take_f32(0.1); + call_return_u128_pair(); + //return; unsafe { diff --git a/src/abi.rs b/src/abi.rs index 3de9ea60e71..f9a747da314 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -113,10 +113,16 @@ fn get_pass_mode<'tcx>( PassMode::ByVal(scalar_to_clif_type(tcx, scalar.clone())) } layout::Abi::ScalarPair(a, b) => { - PassMode::ByValPair( - scalar_to_clif_type(tcx, a.clone()), - scalar_to_clif_type(tcx, b.clone()), - ) + let a = scalar_to_clif_type(tcx, a.clone()); + let b = scalar_to_clif_type(tcx, b.clone()); + if a == types::I128 && b == types::I128 { + // Returning (i128, i128) by-val-pair would take 4 regs, while only 3 are + // available on x86_64. Cranelift gets confused when too many return params + // are used. + PassMode::ByRef + } else { + PassMode::ByValPair(a, b) + } } // FIXME implement Vector Abi in a cg_llvm compatible way From 9f0fad00243c6b1c5b491e40cb1d5b04daa51b93 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 14:35:15 +0200 Subject: [PATCH 11/17] Fix thread disable patch --- patches/0015-Remove-usage-of-unsized-locals.patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/patches/0015-Remove-usage-of-unsized-locals.patch b/patches/0015-Remove-usage-of-unsized-locals.patch index 5d7a780d329..14504cce09f 100644 --- a/patches/0015-Remove-usage-of-unsized-locals.patch +++ b/patches/0015-Remove-usage-of-unsized-locals.patch @@ -94,5 +94,18 @@ index b2142e7..718bb1c 100644 } pub fn min_stack() -> usize { +diff --git a/src/libstd/sys/unix/thread.rs b/src/libstd/sys/unix/thread.rs +index f4a1783..362b537 100644 +--- a/src/libstd/sys/unix/thread.rs ++++ b/src/libstd/sys/unix/thread.rs +@@ -40,6 +40,8 @@ impl Thread { + // unsafe: see thread::Builder::spawn_unchecked for safety requirements + pub unsafe fn new(stack: usize, p: Box) + -> io::Result { ++ panic!("Threads are not yet supported, because cranelift doesn't support atomics."); ++ + let p = box p; + let mut native: libc::pthread_t = mem::zeroed(); + let mut attr: libc::pthread_attr_t = mem::zeroed(); -- 2.20.1 (Apple Git-117) From aae9a8b91dafe09adee1d80079e79526f57d29eb Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 14:35:42 +0200 Subject: [PATCH 12/17] Patch core_arch to tell programs that cpuid is not supported --- patches/0016-Disable-cpuid-intrinsic.patch | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 patches/0016-Disable-cpuid-intrinsic.patch diff --git a/patches/0016-Disable-cpuid-intrinsic.patch b/patches/0016-Disable-cpuid-intrinsic.patch new file mode 100644 index 00000000000..ac24bc6e88c --- /dev/null +++ b/patches/0016-Disable-cpuid-intrinsic.patch @@ -0,0 +1,25 @@ +From 7403e2998345ef0650fd50628d7098d4d1e88e5c Mon Sep 17 00:00:00 2001 +From: bjorn3 +Date: Sat, 6 Apr 2019 12:16:21 +0200 +Subject: [PATCH] Remove usage of unsized locals + +--- + src/stdarch/crates/core_arch/src/x86/cpuid.rs | 2 ++ + 1 files changed, 2 insertions(+), 0 deletions(-) + +diff --git a/src/stdarch/crates/core_arch/src/x86/cpuid.rs b/src/stdarch/crates/core_arch/src/x86/cpuid.rs +index f313c42..ff952bc 100644 +--- a/src/stdarch/crates/core_arch/src/x86/cpuid.rs ++++ b/src/stdarch/crates/core_arch/src/x86/cpuid.rs +@@ -84,6 +84,9 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { + /// Does the host support the `cpuid` instruction? + #[inline] + pub fn has_cpuid() -> bool { ++ // __cpuid intrinsic is not yet implemented ++ return false; ++ + #[cfg(target_env = "sgx")] + { + false +-- +2.20.1 (Apple Git-117) From ee4927e069ae317c4b2360eafe07a3fbaa8f0988 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 29 Jul 2019 18:59:17 +0200 Subject: [PATCH 13/17] Fix _mm_movemask_epi8 The order of iteration was wrong --- example/std_example.rs | 3 +++ src/llvm_intrinsics.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/example/std_example.rs b/example/std_example.rs index e3b3edd86af..8a43af5bd80 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -67,6 +67,9 @@ unsafe fn test_simd() { test_mm_slli_si128(); test_mm_movemask_epi8(); test_mm256_movemask_epi8(); + + let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); + assert_eq!(mask1, 1); } #[target_feature(enable = "sse2")] diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index 32aa8b5d3df..b93fa1bdbdf 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -41,7 +41,7 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( let mut res = fx.bcx.ins().iconst(types::I32, 0); - for lane in 0..lane_count { + for lane in (0..lane_count).rev() { let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign); From 8691b8b8b6d88a51ee4783a8c704f052db479738 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 30 Jul 2019 13:37:47 +0200 Subject: [PATCH 14/17] Test rust-lang/regex example shootout-regex-dna --- .gitignore | 1 + build_sysroot/build_sysroot.sh | 26 +++++++++++--------------- cargo.sh | 14 ++++++++++++++ clean_all.sh | 1 + config.sh | 10 ++-------- crate_patches/regex.patch | 34 ++++++++++++++++++++++++++++++++++ prepare.sh | 7 +++++++ test.sh | 22 ++++++++++++++++++++++ 8 files changed, 92 insertions(+), 23 deletions(-) create mode 100755 cargo.sh create mode 100644 crate_patches/regex.patch diff --git a/.gitignore b/.gitignore index 9a9df1021e5..c455aa46a0c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ perf.data.old /build_sysroot/sysroot_src /build_sysroot/Cargo.lock /rust +/regex diff --git a/build_sysroot/build_sysroot.sh b/build_sysroot/build_sysroot.sh index 57752c402c5..165d67a8146 100755 --- a/build_sysroot/build_sysroot.sh +++ b/build_sysroot/build_sysroot.sh @@ -1,34 +1,30 @@ #!/bin/bash + +# Requires the CHANNEL env var to be set to `debug` or `release.` + set -e cd $(dirname "$0") +pushd ../ >/dev/null +source ./config.sh +popd >/dev/null + # Cleanup for previous run # v Clean target dir except for build scripts and incremental cache rm -r target/*/{debug,release}/{build,deps,examples,libsysroot*,native} || true rm Cargo.lock 2>/dev/null || true rm -r sysroot 2>/dev/null || true -# FIXME find a better way to get the target triple -unamestr=`uname` -if [[ "$unamestr" == 'Linux' ]]; then - TARGET_TRIPLE='x86_64-unknown-linux-gnu' -elif [[ "$unamestr" == 'Darwin' ]]; then - TARGET_TRIPLE='x86_64-apple-darwin' -else - echo "Unsupported os" - exit 1 -fi - # Build libs -mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/ export RUSTFLAGS="$RUSTFLAGS -Z force-unstable-if-unmarked" if [[ "$1" == "--release" ]]; then - channel='release' + sysroot_channel='release' RUSTFLAGS="$RUSTFLAGS -Zmir-opt-level=3" cargo build --target $TARGET_TRIPLE --release else - channel='debug' + sysroot_channel='debug' cargo build --target $TARGET_TRIPLE fi # Copy files to sysroot -cp target/$TARGET_TRIPLE/$channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/ +mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/ +cp target/$TARGET_TRIPLE/$sysroot_channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/ diff --git a/cargo.sh b/cargo.sh new file mode 100755 index 00000000000..42c137030a5 --- /dev/null +++ b/cargo.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +if [ -z $CHANNEL ]; then +export CHANNEL='debug' +fi + +pushd $(dirname "$0") >/dev/null +source config.sh +popd >/dev/null + +cmd=$1 +shift + +cargo $cmd --target $TARGET_TRIPLE $@ diff --git a/clean_all.sh b/clean_all.sh index f768bb50c73..ced73acc579 100755 --- a/clean_all.sh +++ b/clean_all.sh @@ -2,3 +2,4 @@ set -e rm -rf target/ build_sysroot/{sysroot/,sysroot_src/,target/,Cargo.lock} perf.data{,.old} +rm -rf regex/ diff --git a/config.sh b/config.sh index 7fbd4112889..a6868f792de 100644 --- a/config.sh +++ b/config.sh @@ -10,14 +10,8 @@ else exit 1 fi -if [[ "$1" == "--release" ]]; then - channel='release' - cargo build --release -else - channel='debug' - cargo build -fi +TARGET_TRIPLE=$(rustc -vV | grep host | cut -d: -f2 | tr -d " ") -export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$channel'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot' +export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$CHANNEL'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot' RUSTC="rustc $RUSTFLAGS -L crate=target/out --out-dir target/out" export RUSTC_LOG=warn # display metadata load errors diff --git a/crate_patches/regex.patch b/crate_patches/regex.patch new file mode 100644 index 00000000000..4209ccfbdd2 --- /dev/null +++ b/crate_patches/regex.patch @@ -0,0 +1,34 @@ +From febff2a8c639efb5de1e1b4758cdb473847d80ce Mon Sep 17 00:00:00 2001 +From: bjorn3 +Date: Tue, 30 Jul 2019 12:12:37 +0200 +Subject: [PATCH] Disable threads in shootout-regex-dna example + +--- + examples/shootout-regex-dna.rs | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs +index 2171bb3..37382f8 100644 +--- a/examples/shootout-regex-dna.rs ++++ b/examples/shootout-regex-dna.rs +@@ -37,7 +37,7 @@ fn main() { + for variant in variants { + let seq = seq_arc.clone(); + let restr = variant.to_string(); +- let future = thread::spawn(move || variant.find_iter(&seq).count()); ++ let future = variant.find_iter(&seq).count(); + counts.push((restr, future)); + } + +@@ -60,7 +60,7 @@ fn main() { + } + + for (variant, count) in counts { +- println!("{} {}", variant, count.join().unwrap()); ++ println!("{} {}", variant, count); + } + println!("\n{}\n{}\n{}", ilen, clen, seq.len()); + } +-- +2.11.0 + diff --git a/prepare.sh b/prepare.sh index be3388aac55..32cfa4b8c88 100755 --- a/prepare.sh +++ b/prepare.sh @@ -4,3 +4,10 @@ set -e rustup component add rust-src ./build_sysroot/prepare_sysroot_src.sh cargo install hyperfine || echo "Skipping hyperfine install" + +git clone https://github.com/rust-lang/regex.git || echo "rust-lang/regex has already been cloned" +pushd regex +git checkout -- . +git checkout 341f207c1071f7290e3f228c710817c280c8dca1 +git apply ../crate_patches/regex.patch +popd diff --git a/test.sh b/test.sh index 1dfbf924754..1de4a65bbed 100755 --- a/test.sh +++ b/test.sh @@ -1,4 +1,13 @@ #!/bin/bash + +if [[ "$1" == "--release" ]]; then + export CHANNEL='release' + cargo build --release +else + export CHANNEL='debug' + cargo build +fi + source config.sh rm -r target/out || true @@ -39,6 +48,19 @@ $RUSTC example/mod_bench.rs --crate-type bin #echo "[BUILD] sysroot in release mode" #./build_sysroot/build_sysroot.sh --release +pushd regex +echo "[TEST] rust-lang/regex example shootout-regex-dna" +../cargo.sh clean +# Make sure `[codegen mono items] start` doesn't poison the diff +../cargo.sh build --example shootout-regex-dna +cat examples/regexdna-input.txt | ../cargo.sh run --example shootout-regex-dna > res.txt +diff -u res.txt examples/regexdna-output.txt + +# FIXME compile libtest +# echo "[TEST] rust-lang/regex standalone tests" +# ../cargo.sh test +popd + COMPILE_MOD_BENCH_INLINE="$RUSTC example/mod_bench.rs --crate-type bin -Zmir-opt-level=3 -O --crate-name mod_bench_inline" COMPILE_MOD_BENCH_LLVM_0="rustc example/mod_bench.rs --crate-type bin -Copt-level=0 -o target/out/mod_bench_llvm_0 -Cpanic=abort" COMPILE_MOD_BENCH_LLVM_1="rustc example/mod_bench.rs --crate-type bin -Copt-level=1 -o target/out/mod_bench_llvm_1 -Cpanic=abort" From 69526d464fc0ef021beb1718d68035555f30c33d Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 30 Jul 2019 14:37:20 +0200 Subject: [PATCH 15/17] Implement some float simd intrinsics --- example/std_example.rs | 41 +++++++++++--- src/constant.rs | 31 ++++++++++- src/intrinsics.rs | 123 +++++++++++++++++++++++++---------------- src/llvm_intrinsics.rs | 56 ++++++++++++++++--- 4 files changed, 187 insertions(+), 64 deletions(-) diff --git a/example/std_example.rs b/example/std_example.rs index 8a43af5bd80..33523a12871 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -1,5 +1,6 @@ #![feature(core_intrinsics)] +use std::arch::x86_64::*; use std::io::Write; use std::intrinsics; @@ -52,8 +53,6 @@ fn main() { #[target_feature(enable = "sse2")] unsafe fn test_simd() { - use std::arch::x86_64::*; - let x = _mm_setzero_si128(); let y = _mm_set1_epi16(7); let or = _mm_or_si128(x, y); @@ -67,6 +66,8 @@ unsafe fn test_simd() { test_mm_slli_si128(); test_mm_movemask_epi8(); test_mm256_movemask_epi8(); + test_mm_add_epi8(); + test_mm_add_pd(); let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); assert_eq!(mask1, 1); @@ -74,8 +75,6 @@ unsafe fn test_simd() { #[target_feature(enable = "sse2")] unsafe fn test_mm_slli_si128() { - use std::arch::x86_64::*; - #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, @@ -116,8 +115,6 @@ unsafe fn test_mm_slli_si128() { #[target_feature(enable = "sse2")] unsafe fn test_mm_movemask_epi8() { - use std::arch::x86_64::*; - #[rustfmt::skip] let a = _mm_setr_epi8( 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, @@ -131,20 +128,48 @@ unsafe fn test_mm_movemask_epi8() { #[target_feature(enable = "avx2")] unsafe fn test_mm256_movemask_epi8() { - use std::arch::x86_64::*; - let a = _mm256_set1_epi8(-1); let r = _mm256_movemask_epi8(a); let e = -1; assert_eq!(r, e); } +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_add_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); +} + fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { unsafe { assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); } } +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + #[derive(PartialEq)] enum LoopState { Continue(()), diff --git a/src/constant.rs b/src/constant.rs index 10f757ffce8..c8fb2767f9a 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -88,7 +88,7 @@ pub fn trans_constant<'a, 'tcx: 'a>( } pub fn force_eval_const<'a, 'tcx: 'a>( - fx: &mut FunctionCx<'a, 'tcx, impl Backend>, + fx: &FunctionCx<'a, 'tcx, impl Backend>, const_: &'tcx Const, ) -> &'tcx Const<'tcx> { match const_.val { @@ -422,3 +422,32 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for TransPlaceInterpreter { Ok(()) } } + +pub fn mir_operand_get_const_val<'tcx>( + fx: &FunctionCx<'_, 'tcx, impl Backend>, + operand: &Operand<'tcx>, +) -> Result<&'tcx Const<'tcx>, String> { + let place = match operand { + Operand::Copy(place) => place, + Operand::Constant(const_) => return Ok(force_eval_const(fx, const_.literal)), + _ => return Err(format!("{:?}", operand)), + }; + + assert!(place.projection.is_none()); + let static_ = match &place.base { + PlaceBase::Static(static_) => { + static_ + } + PlaceBase::Local(_) => return Err("local".to_string()), + }; + + Ok(match &static_.kind { + StaticKind::Static(_) => unimplemented!(), + StaticKind::Promoted(promoted) => { + fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId { + instance: fx.instance, + promoted: Some(*promoted), + })).unwrap() + } + }) +} diff --git a/src/intrinsics.rs b/src/intrinsics.rs index 042a0934709..a456cac1d74 100644 --- a/src/intrinsics.rs +++ b/src/intrinsics.rs @@ -144,7 +144,7 @@ pub fn lane_type_and_count<'tcx>( (lane_layout, lane_count) } -fn simd_for_each_lane<'tcx, B: Backend>( +pub fn simd_for_each_lane<'tcx, B: Backend>( fx: &mut FunctionCx<'_, 'tcx, B>, intrinsic: &str, x: CValue<'tcx>, @@ -170,23 +170,37 @@ fn simd_for_each_lane<'tcx, B: Backend>( } } -fn bool_to_zero_or_max_uint<'tcx>( +pub fn bool_to_zero_or_max_uint<'tcx>( fx: &mut FunctionCx<'_, 'tcx, impl Backend>, layout: TyLayout<'tcx>, val: Value, ) -> CValue<'tcx> { let ty = fx.clif_type(layout.ty).unwrap(); - let zero = fx.bcx.ins().iconst(ty, 0); - let max = fx.bcx.ins().iconst(ty, (u64::max_value() >> (64 - ty.bits())) as i64); - let res = crate::common::codegen_select(&mut fx.bcx, val, max, zero); + let int_ty = match ty { + types::F32 => types::I32, + types::F64 => types::I64, + ty => ty, + }; + + let zero = fx.bcx.ins().iconst(int_ty, 0); + let max = fx.bcx.ins().iconst(int_ty, (u64::max_value() >> (64 - int_ty.bits())) as i64); + let mut res = crate::common::codegen_select(&mut fx.bcx, val, max, zero); + + if ty.is_float() { + res = fx.bcx.ins().bitcast(ty, res); + } + CValue::by_val(res, layout) } macro_rules! simd_cmp { ($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane); + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) }); }; @@ -203,10 +217,13 @@ macro_rules! simd_cmp { } -macro_rules! simd_binop { +macro_rules! simd_int_binop { ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = fx.bcx.ins().$op(x_lane, y_lane); + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; CValue::by_val(res_lane, ret_lane_layout) }); }; @@ -222,6 +239,42 @@ macro_rules! simd_binop { }; } +macro_rules! simd_int_flt_binop { + ($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + }; + ($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + }; +} + +macro_rules! simd_flt_binop { + ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + } +} + pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( fx: &mut FunctionCx<'a, 'tcx, impl Backend>, def_id: DefId, @@ -840,30 +893,7 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( let indexes = { use rustc::mir::interpret::*; - let idx_place = match idx { - Operand::Copy(idx_place) => { - idx_place - } - _ => panic!("simd_shuffle* idx is not Operand::Copy, but {:?}", idx), - }; - - assert!(idx_place.projection.is_none()); - let static_ = match &idx_place.base { - PlaceBase::Static(static_) => { - static_ - } - PlaceBase::Local(_) => panic!("simd_shuffle* idx is not constant, but a local"), - }; - - let idx_const = match &static_.kind { - StaticKind::Static(_) => unimplemented!(), - StaticKind::Promoted(promoted) => { - fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId { - instance: fx.instance, - promoted: Some(*promoted), - })).unwrap() - } - }; + let idx_const = crate::constant::mir_operand_get_const_val(fx, idx).expect("simd_shuffle* idx not const"); let idx_bytes = match idx_const.val { ConstValue::ByRef { align: _, offset, alloc } => { @@ -900,41 +930,38 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>( }; simd_add, (c x, c y) { - simd_binop!(fx, intrinsic, iadd(x, y) -> ret); + simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret); }; simd_sub, (c x, c y) { - simd_binop!(fx, intrinsic, isub(x, y) -> ret); + simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret); }; simd_mul, (c x, c y) { - simd_binop!(fx, intrinsic, imul(x, y) -> ret); + simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret); }; simd_div, (c x, c y) { - simd_binop!(fx, intrinsic, udiv|sdiv(x, y) -> ret); - }; - simd_rem, (c x, c y) { - simd_binop!(fx, intrinsic, urem|srem(x, y) -> ret); + simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret); }; simd_shl, (c x, c y) { - simd_binop!(fx, intrinsic, ishl(x, y) -> ret); + simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret); }; simd_shr, (c x, c y) { - simd_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret); + simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret); }; simd_and, (c x, c y) { - simd_binop!(fx, intrinsic, band(x, y) -> ret); + simd_int_binop!(fx, intrinsic, band(x, y) -> ret); }; simd_or, (c x, c y) { - simd_binop!(fx, intrinsic, bor(x, y) -> ret); + simd_int_binop!(fx, intrinsic, bor(x, y) -> ret); }; simd_xor, (c x, c y) { - simd_binop!(fx, intrinsic, bxor(x, y) -> ret); + simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret); }; simd_fmin, (c x, c y) { - simd_binop!(fx, intrinsic, fmin(x, y) -> ret); + simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret); }; simd_fmax, (c x, c y) { - simd_binop!(fx, intrinsic, fmax(x, y) -> ret); + simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret); }; } diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index b93fa1bdbdf..284bdee52b8 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -1,4 +1,5 @@ use crate::prelude::*; +use crate::intrinsics::*; use rustc::ty::subst::SubstsRef; @@ -26,7 +27,7 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( } }; - crate::intrinsics::intrinsic_match! { + intrinsic_match! { fx, intrinsic, substs, args, _ => { fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); @@ -34,17 +35,28 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( }; // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` - llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) { - let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic); - assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty); - assert!(lane_count == 16 || lane_count == 32); + llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb | llvm.x86.sse2.movmsk.pd, (c a) { + let (lane_layout, lane_count) = lane_type_and_count(fx, a.layout(), intrinsic); + let lane_ty = fx.clif_type(lane_layout.ty).unwrap(); + assert!(lane_count <= 32); let mut res = fx.bcx.ins().iconst(types::I32, 0); for lane in (0..lane_count).rev() { let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); - let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int - let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign); + + // cast float to int + let a_lane = match lane_ty { + types::F32 => fx.bcx.ins().bitcast(types::I32, a_lane), + types::F64 => fx.bcx.ins().bitcast(types::I64, a_lane), + _ => a_lane, + }; + + // extract sign bit of an int + let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1)); + + // shift sign bit into result + let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false); res = fx.bcx.ins().ishl_imm(res, 1); res = fx.bcx.ins().bor(res, a_lane_sign); } @@ -52,6 +64,36 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32)); ret.write_cvalue(fx, res); }; + llvm.x86.sse2.cmp.ps | llvm.x86.sse2.cmp.pd, (c x, c y, o kind) { + let kind_const = crate::constant::mir_operand_get_const_val(fx, kind).expect("llvm.x86.sse2.cmp.* kind not const"); + let flt_cc = match kind_const.val.try_to_bits(Size::from_bytes(1)).expect(&format!("kind not scalar: {:?}", kind_const)) { + 0 => FloatCC::Equal, + 1 => FloatCC::LessThan, + 2 => FloatCC::LessThanOrEqual, + 7 => { + unimplemented!("Compares corresponding elements in `a` and `b` to see if neither is `NaN`."); + } + 3 => { + unimplemented!("Compares corresponding elements in `a` and `b` to see if either is `NaN`."); + } + 4 => FloatCC::NotEqual, + 5 => { + unimplemented!("not less than"); + } + 6 => { + unimplemented!("not less than or equal"); + } + kind => unreachable!("kind {:?}", kind), + }; + + simd_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.sty { + ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + }); + }; } if let Some((_, dest)) = destination { From b62e892fb5f9215addd3e8e7f33dc84d6ff69946 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 30 Jul 2019 14:51:05 +0200 Subject: [PATCH 16/17] Misc changes --- Readme.md | 2 +- test.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Readme.md b/Readme.md index 56b65cf3efe..d26e453b2a2 100644 --- a/Readme.md +++ b/Readme.md @@ -32,7 +32,7 @@ $ RUSTFLAGS="-Cpanic=abort -Zcodegen-backend=$cg_clif_dir/target/debug/librustc_ * Good non-rust abi support ([vectors are passed by-ref](https://github.com/bjorn3/rustc_codegen_cranelift/issues/10)) * Checked binops ([some missing instructions in cranelift](https://github.com/CraneStation/cranelift/issues/460)) * Inline assembly ([no cranelift support](https://github.com/CraneStation/cranelift/issues/444)) -* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171)) +* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), some basic things work) ## Troubleshooting diff --git a/test.sh b/test.sh index 1de4a65bbed..8df6b207964 100755 --- a/test.sh +++ b/test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + if [[ "$1" == "--release" ]]; then export CHANNEL='release' cargo build --release From 1028fbb68c5f2caaeda4679a59258275b8c3d26e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 30 Jul 2019 15:00:15 +0200 Subject: [PATCH 17/17] Fix release builds --- src/abi.rs | 4 +++- src/trap.rs | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/abi.rs b/src/abi.rs index f9a747da314..c66dbe3958d 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -274,7 +274,9 @@ impl<'a, 'tcx: 'a, B: Backend + 'a> FunctionCx<'a, 'tcx, B> { .module .declare_func_in_func(func_id, &mut self.bcx.func); let call_inst = self.bcx.ins().call(func_ref, args); - self.add_comment(call_inst, format!("easy_call {}", name)); + #[cfg(debug_assertions)] { + self.add_comment(call_inst, format!("easy_call {}", name)); + } let results = self.bcx.inst_results(call_inst); assert!(results.len() <= 2, "{}", results.len()); results diff --git a/src/trap.rs b/src/trap.rs index cbbe5d203bb..c4e2cf35766 100644 --- a/src/trap.rs +++ b/src/trap.rs @@ -7,7 +7,9 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, impl cranelift_module::Backend>, ms returns: vec![], }).unwrap(); let puts = fx.module.declare_func_in_func(puts, &mut fx.bcx.func); - fx.add_entity_comment(puts, "puts"); + #[cfg(debug_assertions)] { + fx.add_entity_comment(puts, "puts"); + } let symbol_name = fx.tcx.symbol_name(fx.instance); let real_msg = format!("trap at {:?} ({}): {}\0", fx.instance, symbol_name, msg); @@ -19,7 +21,9 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, impl cranelift_module::Backend>, ms let _ = fx.module.define_data(msg_id, &data_ctx); let local_msg_id = fx.module.declare_data_in_func(msg_id, fx.bcx.func); - fx.add_entity_comment(local_msg_id, msg); + #[cfg(debug_assertions)] { + fx.add_entity_comment(local_msg_id, msg); + } let msg_ptr = fx.bcx.ins().global_value(pointer_ty(fx.tcx), local_msg_id); fx.bcx.ins().call(puts, &[msg_ptr]); }