Fix linking with shl/shr, add memset on host and support __assertfail

This commit is contained in:
Andrzej Janik 2020-11-21 01:53:07 +01:00
parent 84ac086146
commit 6e39c4a90c
12 changed files with 359 additions and 21 deletions

View file

@ -569,6 +569,29 @@ impl<'a> CommandList<'a> {
Ok(())
}
pub unsafe fn append_memory_fill_unsafe<T: Copy + Sized>(
&mut self,
dst: *mut c_void,
pattern: &T,
byte_size: usize,
signal: Option<&mut Event<'a>>,
wait: &mut [Event<'a>],
) -> Result<()> {
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let (wait_len, wait_ptr) = Event::raw_slice(wait);
check!(sys::zeCommandListAppendMemoryFill(
self.0,
dst,
pattern as *const T as *const _,
mem::size_of::<T>(),
byte_size,
signal_event,
wait_len,
wait_ptr
));
Ok(())
}
pub fn append_launch_kernel(
&mut self,
kernel: &'a Kernel,

View file

@ -2932,7 +2932,7 @@ pub extern "C" fn cuMemsetD32_v2(
ui: ::std::os::raw::c_uint,
N: usize,
) -> CUresult {
r#impl::unimplemented()
r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda()
}
#[cfg_attr(not(test), no_mangle)]

View file

@ -1,5 +1,5 @@
use super::{stream, CUresult, GlobalState};
use std::ffi::c_void;
use std::{ffi::c_void, mem};
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_current_context(|ctx| {
@ -27,6 +27,17 @@ pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
.map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
}
pub(crate) fn set_d32_v2(dst: *mut c_void, ui: u32, n: usize) -> Result<(), CUresult> {
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| {
let mut cmd_list = stream.command_list()?;
unsafe {
cmd_list.append_memory_fill_unsafe(dst, &ui, mem::size_of::<u32>() * n, None, &mut [])
}?;
stream.queue.execute(cmd_list)?;
Ok::<_, CUresult>(())
})?
}
#[cfg(test)]
mod test {
use super::super::test::CudaDriverFns;

View file

@ -120,22 +120,27 @@ atomic_dec(atom_acquire_sys_shared_dec, memory_order_acquire, memory_order_acqui
atomic_dec(atom_release_sys_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local);
atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
uint FUNC(bfe_u32)(uint base, uint pos, uint len)
{
uint FUNC(bfe_u32)(uint base, uint pos, uint len) {
return intel_ubfe(base, pos, len);
}
ulong FUNC(bfe_u64)(ulong base, uint pos, uint len)
{
ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) {
return intel_ubfe(base, pos, len);
}
int FUNC(bfe_s32)(int base, uint pos, uint len)
{
int FUNC(bfe_s32)(int base, uint pos, uint len) {
return intel_sbfe(base, pos, len);
}
long FUNC(bfe_s64)(long base, uint pos, uint len)
{
long FUNC(bfe_s64)(long base, uint pos, uint len) {
return intel_sbfe(base, pos, len);
}
}
void FUNC(__assertfail)(
__private ulong* message,
__private ulong* file,
__private uint* line,
__private ulong* function,
__private ulong* charSize
) {
}

Binary file not shown.

View file

@ -0,0 +1,79 @@
.version 6.5
.target sm_30
.address_size 64
.extern .func __assertfail
(
.param .b64 __assertfail_param_0,
.param .b64 __assertfail_param_1,
.param .b32 __assertfail_param_2,
.param .b64 __assertfail_param_3,
.param .b64 __assertfail_param_4
);
.extern .func __assertfail
(
.param .b64 __assertfail_param_0,
.param .b64 __assertfail_param_1,
.param .b32 __assertfail_param_2,
.param .b64 __assertfail_param_3,
.param .b64 __assertfail_param_4
);
.visible .entry assertfail(
.param .u64 input,
.param .u64 output
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .u64 temp;
.reg .u64 temp2;
ld.param.u64 in_addr, [input];
ld.param.u64 out_addr, [output];
{
.reg .b32 temp_param_reg;
mov.u32 temp_param_reg, 0;
// <end>}
.param .b64 param0;
st.param.b64 [param0+0], in_addr;
.param .b64 param1;
st.param.b64 [param1+0], in_addr;
.param .b32 param2;
st.param.b32 [param2+0], temp_param_reg;
.param .b64 param3;
st.param.b64 [param3+0], in_addr;
.param .b64 param4;
st.param.b64 [param4+0], in_addr;
call.uni
__assertfail,
(
param0,
param1,
param2,
param3,
param4
);
//{
}
ld.u64 temp, [in_addr];
add.u64 temp2, temp, 1;
st.u64 [out_addr], temp2;
ret;
}
.extern .func __assertfail
(
.param .b64 __assertfail_param_0,
.param .b64 __assertfail_param_1,
.param .b32 __assertfail_param_2,
.param .b64 __assertfail_param_3,
.param .b64 __assertfail_param_4
);

View file

@ -0,0 +1,105 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%67 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %12 "assertfail"
OpDecorate %1 LinkageAttributes "__notcuda_ptx_impl____assertfail" Import
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%73 = OpTypeFunction %void %_ptr_Function_ulong %_ptr_Function_ulong %_ptr_Function_uint %_ptr_Function_ulong %_ptr_Function_ulong
%74 = OpTypeFunction %void %ulong %ulong
%uint_0 = OpConstant %uint 0
%ulong_0 = OpConstant %ulong 0
%uchar = OpTypeInt 8 0
%_ptr_Function_uchar = OpTypePointer Function %uchar
%ulong_0_0 = OpConstant %ulong 0
%ulong_0_1 = OpConstant %ulong 0
%ulong_0_2 = OpConstant %ulong 0
%ulong_0_3 = OpConstant %ulong 0
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%1 = OpFunction %void None %73
%61 = OpFunctionParameter %_ptr_Function_ulong
%62 = OpFunctionParameter %_ptr_Function_ulong
%63 = OpFunctionParameter %_ptr_Function_uint
%64 = OpFunctionParameter %_ptr_Function_ulong
%65 = OpFunctionParameter %_ptr_Function_ulong
OpFunctionEnd
%12 = OpFunction %void None %74
%25 = OpFunctionParameter %ulong
%26 = OpFunctionParameter %ulong
%60 = OpLabel
%13 = OpVariable %_ptr_Function_ulong Function
%14 = OpVariable %_ptr_Function_ulong Function
%15 = OpVariable %_ptr_Function_ulong Function
%16 = OpVariable %_ptr_Function_ulong Function
%17 = OpVariable %_ptr_Function_ulong Function
%18 = OpVariable %_ptr_Function_ulong Function
%19 = OpVariable %_ptr_Function_uint Function
%20 = OpVariable %_ptr_Function_ulong Function
%21 = OpVariable %_ptr_Function_ulong Function
%22 = OpVariable %_ptr_Function_uint Function
%23 = OpVariable %_ptr_Function_ulong Function
%24 = OpVariable %_ptr_Function_ulong Function
OpStore %13 %25
OpStore %14 %26
%27 = OpLoad %ulong %13
OpStore %15 %27
%28 = OpLoad %ulong %14
OpStore %16 %28
%53 = OpCopyObject %uint %uint_0
%29 = OpCopyObject %uint %53
OpStore %19 %29
%30 = OpLoad %ulong %15
%77 = OpBitcast %_ptr_Function_uchar %20
%78 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %77 %ulong_0
%43 = OpBitcast %_ptr_Function_ulong %78
%54 = OpCopyObject %ulong %30
OpStore %43 %54
%31 = OpLoad %ulong %15
%79 = OpBitcast %_ptr_Function_uchar %21
%80 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %79 %ulong_0_0
%45 = OpBitcast %_ptr_Function_ulong %80
%55 = OpCopyObject %ulong %31
OpStore %45 %55
%32 = OpLoad %uint %19
%81 = OpBitcast %_ptr_Function_uchar %22
%82 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %81 %ulong_0_1
%47 = OpBitcast %_ptr_Function_uint %82
OpStore %47 %32
%33 = OpLoad %ulong %15
%83 = OpBitcast %_ptr_Function_uchar %23
%84 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %83 %ulong_0_2
%49 = OpBitcast %_ptr_Function_ulong %84
%56 = OpCopyObject %ulong %33
OpStore %49 %56
%34 = OpLoad %ulong %15
%85 = OpBitcast %_ptr_Function_uchar %24
%86 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %85 %ulong_0_3
%51 = OpBitcast %_ptr_Function_ulong %86
%57 = OpCopyObject %ulong %34
OpStore %51 %57
%87 = OpFunctionCall %void %1 %20 %21 %22 %23 %24
%36 = OpLoad %ulong %15
%58 = OpConvertUToPtr %_ptr_Generic_ulong %36
%35 = OpLoad %ulong %58
OpStore %17 %35
%38 = OpLoad %ulong %17
%37 = OpIAdd %ulong %38 %ulong_1
OpStore %18 %37
%39 = OpLoad %ulong %16
%40 = OpLoad %ulong %18
%59 = OpConvertUToPtr %_ptr_Generic_ulong %39
OpStore %59 %40
OpReturn
OpFunctionEnd

View file

@ -139,6 +139,8 @@ test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]);
test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]);
test_ptx!(stateful_ld_st_ntid_sub, [96311u64], [96311u64]);
test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]);
// For now, we just that it builds and links
test_ptx!(assertfail, [716523871u64], [716523872u64]);
struct DisplayError<T: Debug> {
err: T,

View file

@ -39,7 +39,8 @@
OpStore %6 %12
%15 = OpLoad %ulong %6
%21 = OpCopyObject %ulong %15
%20 = OpShiftLeftLogical %ulong %21 %uint_2
%32 = OpUConvert %ulong %uint_2
%20 = OpShiftLeftLogical %ulong %21 %32
%14 = OpCopyObject %ulong %20
OpStore %7 %14
%16 = OpLoad %ulong %5

View file

@ -0,0 +1,30 @@
// HACK ALERT
// This test is for testing workaround for a bug in IGC where linking fails
// if there is shl/shr with different width of value and shift
.version 6.5
.target sm_30
.address_size 64
.visible .entry shl_link_hack(
.param .u64 input,
.param .u64 output
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .u64 temp;
.reg .u64 temp2;
ld.param.u64 in_addr, [input];
ld.param.u64 out_addr, [output];
// Here only to trigger linking
.reg .u32 unused;
atom.inc.u32 unused, [out_addr], 2000000;
ld.u64 temp, [in_addr];
shl.b64 temp2, temp, 2;
st.u64 [out_addr], temp2;
ret;
}

View file

@ -0,0 +1,65 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%34 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "shl_link_hack"
OpDecorate %29 LinkageAttributes "__notcuda_ptx_impl__atom_relaxed_gpu_generic_inc" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%_ptr_Generic_uint = OpTypePointer Generic %uint
%38 = OpTypeFunction %uint %_ptr_Generic_uint %uint
%ulong = OpTypeInt 64 0
%40 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%uint_2000000 = OpConstant %uint 2000000
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%uint_2 = OpConstant %uint 2
%29 = OpFunction %uint None %38
%31 = OpFunctionParameter %_ptr_Generic_uint
%32 = OpFunctionParameter %uint
OpFunctionEnd
%1 = OpFunction %void None %40
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%28 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2
OpStore %4 %11
%12 = OpLoad %ulong %3
OpStore %5 %12
%14 = OpLoad %ulong %5
%23 = OpConvertUToPtr %_ptr_Generic_uint %14
%13 = OpFunctionCall %uint %29 %23 %uint_2000000
OpStore %8 %13
%16 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_ulong %16
%15 = OpLoad %ulong %24
OpStore %6 %15
%18 = OpLoad %ulong %6
%26 = OpCopyObject %ulong %18
%44 = OpUConvert %ulong %uint_2
%25 = OpShiftLeftLogical %ulong %26 %44
%17 = OpCopyObject %ulong %25
OpStore %7 %17
%19 = OpLoad %ulong %5
%20 = OpLoad %ulong %7
%27 = OpConvertUToPtr %_ptr_Generic_ulong %19
OpStore %27 %20
OpReturn
OpFunctionEnd

View file

@ -465,7 +465,9 @@ pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<Module, TranslateErro
let directives = ast
.directives
.into_iter()
.map(|directive| translate_directive(&mut id_defs, &mut ptx_impl_imports, directive))
.filter_map(|directive| {
translate_directive(&mut id_defs, &mut ptx_impl_imports, directive).transpose()
})
.collect::<Result<Vec<_>, _>>()?;
let must_link_ptx_impl = ptx_impl_imports.len() > 0;
let directives = ptx_impl_imports
@ -1173,13 +1175,13 @@ fn emit_memory_model(builder: &mut dr::Builder) {
fn translate_directive<'input>(
id_defs: &mut GlobalStringIdResolver<'input>,
ptx_impl_imports: &mut HashMap<String, Directive>,
ptx_impl_imports: &mut HashMap<String, Directive<'input>>,
d: ast::Directive<'input, ast::ParsedArgParams<'input>>,
) -> Result<Directive<'input>, TranslateError> {
) -> Result<Option<Directive<'input>>, TranslateError> {
Ok(match d {
ast::Directive::Variable(v) => Directive::Variable(translate_variable(id_defs, v)?),
ast::Directive::Variable(v) => Some(Directive::Variable(translate_variable(id_defs, v)?)),
ast::Directive::Method(f) => {
Directive::Method(translate_function(id_defs, ptx_impl_imports, f)?)
translate_function(id_defs, ptx_impl_imports, f)?.map(Directive::Method)
}
})
}
@ -1219,11 +1221,27 @@ fn translate_variable<'a>(
fn translate_function<'a>(
id_defs: &mut GlobalStringIdResolver<'a>,
ptx_impl_imports: &mut HashMap<String, Directive>,
ptx_impl_imports: &mut HashMap<String, Directive<'a>>,
f: ast::ParsedFunction<'a>,
) -> Result<Function<'a>, TranslateError> {
) -> Result<Option<Function<'a>>, TranslateError> {
let import_as = match &f.func_directive {
ast::MethodDecl::Func(_, "__assertfail", _) => {
Some("__notcuda_ptx_impl____assertfail".to_owned())
}
_ => None,
};
let (str_resolver, fn_resolver, fn_decl) = id_defs.start_fn(&f.func_directive)?;
to_ssa(ptx_impl_imports, str_resolver, fn_resolver, fn_decl, f.body)
let mut func = to_ssa(ptx_impl_imports, str_resolver, fn_resolver, fn_decl, f.body)?;
func.import_as = import_as;
if func.import_as.is_some() {
ptx_impl_imports.insert(
func.import_as.as_ref().unwrap().clone(),
Directive::Method(func),
);
Ok(None)
} else {
Ok(Some(func))
}
}
fn expand_kernel_params<'a, 'b>(
@ -5302,7 +5320,6 @@ pub enum StateSpace {
Local,
Shared,
Param,
ParamReg,
}
impl From<ast::StateSpace> for StateSpace {