1b9ba2b233
Too many changes to list, but broadly: * Remove Intel GPU support from the compiler * Add AMD GPU support to the compiler * Remove Intel GPU host code * Add AMD GPU host code * More device instructions. From 40 to 68 * More host functions. From 48 to 184 * Add proof of concept implementation of OptiX framework * Add minimal support of cuDNN, cuBLAS, cuSPARSE, cuFFT, NCCL, NVML * Improve ZLUDA launcher for Windows
31 lines
673 B
Plaintext
31 lines
673 B
Plaintext
.version 6.5
|
|
.target sm_30
|
|
.address_size 64
|
|
|
|
.global .texref image;
|
|
|
|
.visible .entry texref_1d(
|
|
.param .s32 input_x,
|
|
.param .u64 output
|
|
)
|
|
{
|
|
.reg .u64 out_addr;
|
|
.reg .u64 temp;
|
|
.reg .u64 temp2;
|
|
.reg .s32 x;
|
|
.reg .f32 r;
|
|
.reg .f32 g;
|
|
.reg .f32 b;
|
|
.reg .f32 a;
|
|
|
|
ld.param.s32 x, [input_x];
|
|
ld.param.u64 out_addr, [output];
|
|
|
|
tex.1d.v4.f32.s32 {r, g, b, a}, [image, {x}];
|
|
st.b32 [out_addr], a;
|
|
st.b32 [out_addr+4], b;
|
|
st.b32 [out_addr+8], g;
|
|
st.b32 [out_addr+12], r;
|
|
ret;
|
|
}
|