Add support for and instruction

2020-10-26 18:45:28 +01:00 · 2020-10-26 18:45:28 +01:00 · fc7cc00f47
parent 40bdb83e6b
commit fc7cc00f47
6 changed files with 125 additions and 9 deletions
--- a/ptx/src/ast.rs
+++ b/ptx/src/ast.rs
@ -506,11 +506,12 @@ pub enum Instruction<P: ArgParams> {
    Call(CallInst<P>),
    Abs(AbsDetails, Arg2<P>),
    Mad(MulDetails, Arg4<P>),
-    Or(OrType, Arg3<P>),
+    Or(OrAndType, Arg3<P>),
    Sub(ArithDetails, Arg3<P>),
    Min(MinMaxDetails, Arg3<P>),
    Max(MinMaxDetails, Arg3<P>),
    Rcp(RcpDetails, Arg2<P>),
+    And(OrAndType, Arg3<P>),
 }

 #[derive(Copy, Clone)]
@ -974,7 +975,7 @@ pub struct RetData {
    pub uniform: bool,
 }

-sub_enum!(OrType {
+sub_enum!(OrAndType {
    Pred,
    B16,
    B32,
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@ -125,6 +125,7 @@ match {
    // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID
    "abs",
    "add",
+    "and",
    "bra",
    "call",
    "cvt",
@ -158,6 +159,7 @@ match {
 ExtendedID : &'input str = {
    "abs",
    "add",
+    "and",
    "bra",
    "call",
    "cvt",
@ -608,6 +610,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
    InstAbs,
    InstMad,
    InstOr,
+    InstAnd,
    InstSub,
    InstMin,
    InstMax,
@ -1190,16 +1193,21 @@ SignedIntType: ast::ScalarType = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or
 InstOr: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "or" <d:OrType> <a:Arg3> => ast::Instruction::Or(d, a),
+    "or" <d:OrAndType> <a:Arg3> => ast::Instruction::Or(d, a),
 };

-OrType: ast::OrType = {
-    ".pred" => ast::OrType::Pred,
-    ".b16" => ast::OrType::B16,
-    ".b32" => ast::OrType::B32,
-    ".b64" => ast::OrType::B64,
+OrAndType: ast::OrAndType = {
+    ".pred" => ast::OrAndType::Pred,
+    ".b16" => ast::OrAndType::B16,
+    ".b32" => ast::OrAndType::B32,
+    ".b64" => ast::OrAndType::B64,
 }

+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and
+InstAnd: ast::Instruction<ast::ParsedArgParams<'input>> = {
+    "and" <d:OrAndType> <a:Arg3> => ast::Instruction::And(d, a),
+};
+
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp
 InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = {
    "rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => {
--- a/ptx/src/test/spirv_run/and.ptx
+++ b/ptx/src/test/spirv_run/and.ptx
@ -0,0 +1,23 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry and(
+	.param .u64 input,
+	.param .u64 output
+)
+{
+	.reg .u64 	    in_addr;
+    .reg .u64 	    out_addr;
+    .reg .u32 	    temp1;
+    .reg .u32 	    temp2;
+
+	ld.param.u64 	in_addr, [input];
+    ld.param.u64 	out_addr, [output];
+
+    ld.u32          temp1, [in_addr];
+    ld.u32          temp2, [in_addr+4];
+	and.b32		    temp1, temp1, temp2;
+    st.u32          [out_addr], temp1;
+	ret;
+}
--- a/ptx/src/test/spirv_run/and.spvtxt
+++ b/ptx/src/test/spirv_run/and.spvtxt
@ -0,0 +1,66 @@
+; SPIR-V
+; Version: 1.3
+; Generator: rspirv
+; Bound: 41
+OpCapability GenericPointer
+OpCapability Linkage
+OpCapability Addresses
+OpCapability Kernel
+OpCapability Int8
+OpCapability Int16
+OpCapability Int64
+OpCapability Float16
+OpCapability Float64
+OpCapability FunctionFloatControlINTEL
+OpExtension "SPV_INTEL_float_controls2"
+%33 = OpExtInstImport "OpenCL.std"
+OpMemoryModel Physical64 OpenCL
+OpEntryPoint Kernel %1 "and"
+%34 = OpTypeVoid
+%35 = OpTypeInt 64 0
+%36 = OpTypeFunction %34 %35 %35
+%37 = OpTypePointer Function %35
+%38 = OpTypeInt 32 0
+%39 = OpTypePointer Function %38
+%40 = OpTypePointer Generic %38
+%23 = OpConstant  %35  4
+%1 = OpFunction  %34  None %36
+%8 = OpFunctionParameter  %35 
+%9 = OpFunctionParameter  %35 
+%31 = OpLabel
+%2 = OpVariable  %37  Function
+%3 = OpVariable  %37  Function
+%4 = OpVariable  %37  Function
+%5 = OpVariable  %37  Function
+%6 = OpVariable  %39  Function
+%7 = OpVariable  %39  Function
+OpStore %2 %8
+OpStore %3 %9
+%11 = OpLoad  %35  %2
+%10 = OpCopyObject  %35  %11
+OpStore %4 %10
+%13 = OpLoad  %35  %3
+%12 = OpCopyObject  %35  %13
+OpStore %5 %12
+%15 = OpLoad  %35  %4
+%25 = OpConvertUToPtr  %40  %15
+%14 = OpLoad  %38  %25
+OpStore %6 %14
+%17 = OpLoad  %35  %4
+%24 = OpIAdd  %35  %17 %23
+%26 = OpConvertUToPtr  %40  %24
+%16 = OpLoad  %38  %26
+OpStore %7 %16
+%19 = OpLoad  %38  %6
+%20 = OpLoad  %38  %7
+%28 = OpCopyObject  %38  %19
+%29 = OpCopyObject  %38  %20
+%27 = OpBitwiseAnd  %38  %28 %29
+%18 = OpCopyObject  %38  %27
+OpStore %6 %18
+%21 = OpLoad  %35  %5
+%22 = OpLoad  %38  %6
+%30 = OpConvertUToPtr  %40  %21
+OpStore %30 %22
+OpReturn
+OpFunctionEnd
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@ -89,6 +89,7 @@ test_ptx!(rcp, [2f32], [0.5f32]);
 test_ptx!(mul_non_ftz, [0b1_00000000_10000000000000000000000u32, 0x3f000000u32], [0b1_00000000_01000000000000000000000u32]);
 test_ptx!(constant_f32, [10f32], [5f32]);
 test_ptx!(constant_negative, [-101i32], [101i32]);
+test_ptx!(and, [6u32, 3u32], [2u32]);

 struct DisplayError<T: Debug> {
    err: T,
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@ -1263,6 +1263,9 @@ fn convert_to_typed_statements(
                ast::Instruction::Rcp(d, a) => {
                    result.push(Statement::Instruction(ast::Instruction::Rcp(d, a.cast())))
                }
+                ast::Instruction::And(d, a) => {
+                    result.push(Statement::Instruction(ast::Instruction::And(d, a.cast())))
+                }
            },
            Statement::Label(i) => result.push(Statement::Label(i)),
            Statement::Variable(v) => result.push(Statement::Variable(v)),
@ -2325,7 +2328,7 @@ fn emit_function_body_ops(
                },
                ast::Instruction::Or(t, a) => {
                    let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t));
-                    if *t == ast::OrType::Pred {
+                    if *t == ast::OrAndType::Pred {
                        builder.logical_or(result_type, Some(a.dst), a.src1, a.src2)?;
                    } else {
                        builder.bitwise_or(result_type, Some(a.dst), a.src1, a.src2)?;
@ -2351,6 +2354,14 @@ fn emit_function_body_ops(
                ast::Instruction::Rcp(d, a) => {
                    emit_rcp(builder, map, d, a)?;
                }
+                ast::Instruction::And(t, a) => {
+                    let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t));
+                    if *t == ast::OrAndType::Pred {
+                        builder.logical_and(result_type, Some(a.dst), a.src1, a.src2)?;
+                    } else {
+                        builder.bitwise_and(result_type, Some(a.dst), a.src1, a.src2)?;
+                    }
+                }
            },
            Statement::LoadVar(arg, typ) => {
                let type_id = map.get_or_add(builder, SpirvType::from(typ.clone()));
@ -4041,6 +4052,10 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
                });
                ast::Instruction::Rcp(d, a.map(visitor, &typ)?)
            }
+            ast::Instruction::And(t, a) => ast::Instruction::And(
+                t,
+                a.map_non_shift(visitor, &ast::Type::Scalar(t.into()), false)?,
+            ),
        })
    }
 }
@ -4285,6 +4300,7 @@ impl ast::Instruction<ExpandedArgParams> {
            | ast::Instruction::Min(_, _)
            | ast::Instruction::Max(_, _)
            | ast::Instruction::Rcp(_, _)
+            | ast::Instruction::And(_, _)
            | ast::Instruction::Mad(_, _) => None,
        }
    }
@ -4303,6 +4319,7 @@ impl ast::Instruction<ExpandedArgParams> {
            ast::Instruction::Ret(_) => None,
            ast::Instruction::Call(_) => None,
            ast::Instruction::Or(_, _) => None,
+            ast::Instruction::And(_, _) => None,
            ast::Instruction::Cvta(_, _) => None,
            ast::Instruction::Sub(ast::ArithDetails::Signed(_), _) => None,
            ast::Instruction::Sub(ast::ArithDetails::Unsigned(_), _) => None,