[mlir][sparse] support integral types i32,i16,i8 for *numerical* values

Some sparse matrices operate on integral values (in contrast with the common f32 and f64 values). This CL expands the compiler and runtime support to deal with several common type combinations. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D99999
2021-04-06 16:46:27 -07:00 · 2021-04-06 16:46:27 -07:00 · 3acf49829c
parent b3e9b07a7d
commit 3acf49829c
5 changed files with 142 additions and 161 deletions
--- a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
@ -132,6 +132,12 @@ public:
      name = "sparseValuesF64";
    else if (eltType.isF32())
      name = "sparseValuesF32";
+    else if (eltType.isInteger(32))
+      name = "sparseValuesI32";
+    else if (eltType.isInteger(16))
+      name = "sparseValuesI16";
+    else if (eltType.isInteger(8))
+      name = "sparseValuesI8";
    else
      return failure();
    rewriter.replaceOpWithNewOp<CallOp>(
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@ -837,11 +837,19 @@ static void genReductionEnd(Merger &merger, CodeGen &codegen,
  assert(codegen.curVecLength == 1);
  codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain
  unsigned lhs = op.getNumShapedOperands() - 1;
-  if (red.getType().isa<VectorType>()) {
+  if (auto vtp = red.getType().dyn_cast<VectorType>()) {
    // TODO: assumes + reductions for now
+    StringAttr kind = rewriter.getStringAttr("add");
    Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
-    red = rewriter.create<vector::ReductionOp>(
-        op.getLoc(), ld.getType(), rewriter.getStringAttr("add"), red, ld);
+    // Integer reductions don't accept an accumulator.
+    if (vtp.getElementType().isa<IntegerType>()) {
+      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
+                                                 kind, red, ValueRange{});
+      red = rewriter.create<AddIOp>(op.getLoc(), red, ld);
+    } else {
+      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
+                                                 kind, red, ld);
+    }
  }
  genTensorStore(merger, codegen, rewriter, op, lhs, red);
 }
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@ -127,6 +127,9 @@ public:
  // Primary storage.
  virtual void getValues(std::vector<double> **) { fatal("valf64"); }
  virtual void getValues(std::vector<float> **) { fatal("valf32"); }
+  virtual void getValues(std::vector<int32_t> **) { fatal("vali32"); }
+  virtual void getValues(std::vector<int16_t> **) { fatal("vali16"); }
+  virtual void getValues(std::vector<int8_t> **) { fatal("vali8"); }

  virtual ~SparseTensorStorageBase() {}

@ -453,65 +456,59 @@ char *getTensorFilename(uint64_t id) {
 // implementation of a bufferized SparseTensor in MLIR. This could be replaced
 // by actual codegen in MLIR.
 //
+// Because we cannot use C++ templates with C linkage, some macro magic is used
+// to generate implementations for all required type combinations that can be
+// called from MLIR generated code.
+//
 //===----------------------------------------------------------------------===//

-// Cannot use templates with C linkage.
-
-struct MemRef1DU64 {
-  const uint64_t *base;
-  const uint64_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DU32 {
-  const uint32_t *base;
-  const uint32_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DU16 {
-  const uint16_t *base;
-  const uint16_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DU8 {
-  const uint8_t *base;
-  const uint8_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DF64 {
-  const double *base;
-  const double *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DF32 {
-  const float *base;
-  const float *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
-enum PrimaryTypeEnum : uint64_t { kF64 = 1, kF32 = 2 };
+#define TEMPLATE(NAME, TYPE)                                                   \
+  struct NAME {                                                                \
+    const TYPE *base;                                                          \
+    const TYPE *data;                                                          \
+    uint64_t off;                                                              \
+    uint64_t sizes[1];                                                         \
+    uint64_t strides[1];                                                       \
+  }

 #define CASE(p, i, v, P, I, V)                                                 \
  if (ptrTp == (p) && indTp == (i) && valTp == (v))                            \
  return newSparseTensor<P, I, V>(filename, sparsity, asize)

+#define IMPL1(RET, NAME, TYPE, LIB)                                            \
+  RET NAME(void *tensor) {                                                     \
+    std::vector<TYPE> *v;                                                      \
+    static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v);                   \
+    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+  }
+
+#define IMPL2(RET, NAME, TYPE, LIB)                                            \
+  RET NAME(void *tensor, uint64_t d) {                                         \
+    std::vector<TYPE> *v;                                                      \
+    static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v, d);                \
+    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+  }
+
+TEMPLATE(MemRef1DU64, uint64_t);
+TEMPLATE(MemRef1DU32, uint32_t);
+TEMPLATE(MemRef1DU16, uint16_t);
+TEMPLATE(MemRef1DU8, uint8_t);
+TEMPLATE(MemRef1DI32, int32_t);
+TEMPLATE(MemRef1DI16, int16_t);
+TEMPLATE(MemRef1DI8, int8_t);
+TEMPLATE(MemRef1DF64, double);
+TEMPLATE(MemRef1DF32, float);
+
+enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
+
+enum PrimaryTypeEnum : uint64_t {
+  kF64 = 1,
+  kF32 = 2,
+  kI32 = 3,
+  kI16 = 4,
+  kI8 = 5
+};
+
 void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
                      uint64_t asize, uint64_t astride, uint64_t ptrTp,
                      uint64_t indTp, uint64_t valTp) {
@ -534,6 +531,17 @@ void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
  CASE(kU16, kU16, kF32, uint16_t, uint16_t, float);
  CASE(kU8, kU8, kF32, uint8_t, uint8_t, float);

+  // Integral matrices with low overhead storage.
+  CASE(kU32, kU32, kI32, uint32_t, uint32_t, int32_t);
+  CASE(kU32, kU32, kI16, uint32_t, uint32_t, int16_t);
+  CASE(kU32, kU32, kI8, uint32_t, uint32_t, int8_t);
+  CASE(kU16, kU16, kI32, uint16_t, uint16_t, int32_t);
+  CASE(kU16, kU16, kI16, uint16_t, uint16_t, int16_t);
+  CASE(kU16, kU16, kI8, uint16_t, uint16_t, int8_t);
+  CASE(kU8, kU8, kI32, uint8_t, uint8_t, int32_t);
+  CASE(kU8, kU8, kI16, uint8_t, uint8_t, int16_t);
+  CASE(kU8, kU8, kI8, uint8_t, uint8_t, int8_t);
+
  // Unsupported case (add above if needed).
  fputs("unsupported combination of types\n", stderr);
  exit(1);
@ -545,70 +553,29 @@ uint64_t sparseDimSize(void *tensor, uint64_t d) {
  return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
 }

-MemRef1DU64 sparsePointers64(void *tensor, uint64_t d) {
-  std::vector<uint64_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU32 sparsePointers32(void *tensor, uint64_t d) {
-  std::vector<uint32_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU16 sparsePointers16(void *tensor, uint64_t d) {
-  std::vector<uint16_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU8 sparsePointers8(void *tensor, uint64_t d) {
-  std::vector<uint8_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU64 sparseIndices64(void *tensor, uint64_t d) {
-  std::vector<uint64_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU32 sparseIndices32(void *tensor, uint64_t d) {
-  std::vector<uint32_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU16 sparseIndices16(void *tensor, uint64_t d) {
-  std::vector<uint16_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU8 sparseIndices8(void *tensor, uint64_t d) {
-  std::vector<uint8_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DF64 sparseValuesF64(void *tensor) {
-  std::vector<double> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DF32 sparseValuesF32(void *tensor) {
-  std::vector<float> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
+IMPL2(MemRef1DU64, sparsePointers64, uint64_t, getPointers)
+IMPL2(MemRef1DU32, sparsePointers32, uint32_t, getPointers)
+IMPL2(MemRef1DU16, sparsePointers16, uint16_t, getPointers)
+IMPL2(MemRef1DU8, sparsePointers8, uint8_t, getPointers)
+IMPL2(MemRef1DU64, sparseIndices64, uint64_t, getIndices)
+IMPL2(MemRef1DU32, sparseIndices32, uint32_t, getIndices)
+IMPL2(MemRef1DU16, sparseIndices16, uint16_t, getIndices)
+IMPL2(MemRef1DU8, sparseIndices8, uint8_t, getIndices)
+IMPL1(MemRef1DF64, sparseValuesF64, double, getValues)
+IMPL1(MemRef1DF32, sparseValuesF32, float, getValues)
+IMPL1(MemRef1DI32, sparseValuesI32, int32_t, getValues)
+IMPL1(MemRef1DI16, sparseValuesI16, int16_t, getValues)
+IMPL1(MemRef1DI8, sparseValuesI8, int8_t, getValues)

 void delSparseTensor(void *tensor) {
  delete static_cast<SparseTensorStorageBase *>(tensor);
 }

+#undef TEMPLATE
+#undef CASE
+#undef IMPL1
+#undef IMPL2
+
 } // extern "C"

 #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
--- a/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir
@ -54,18 +54,18 @@ module {
  // a sparse matrix A with a dense vector b into a dense vector x.
  //
  func @kernel_matvec(%argA: !SparseTensor,
-                      %argb: tensor<?xf32>,
-                      %argx: tensor<?xf32>) -> tensor<?xf32> {
-    %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xf32>
+                      %argb: tensor<?xi32>,
+                      %argx: tensor<?xi32>) -> tensor<?xi32> {
+    %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xi32>
    %0 = linalg.generic #matvec
-      ins(%arga, %argb: tensor<?x?xf32>, tensor<?xf32>)
-      outs(%argx: tensor<?xf32>) {
-      ^bb(%a: f32, %b: f32, %x: f32):
-        %0 = mulf %a, %b : f32
-        %1 = addf %x, %0 : f32
-        linalg.yield %1 : f32
-    } -> tensor<?xf32>
-    return %0 : tensor<?xf32>
+      ins(%arga, %argb: tensor<?x?xi32>, tensor<?xi32>)
+      outs(%argx: tensor<?xi32>) {
+      ^bb(%a: i32, %b: i32, %x: i32):
+        %0 = muli %a, %b : i32
+        %1 = addi %x, %0 : i32
+        linalg.yield %1 : i32
+    } -> tensor<?xi32>
+    return %0 : tensor<?xi32>
  }

  //
@ -79,7 +79,7 @@ module {
  // Main driver that reads matrix from file and calls the sparse kernel.
  //
  func @entry() {
-    %f0 = constant 0.0 : f32
+    %i0 = constant 0 : i32
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
@ -89,51 +89,51 @@ module {
    // Mark inner dimension of the matrix as sparse and encode the
    // storage scheme types (this must match the metadata in the
    // alias above and compiler switches). In this case, we test
-    // that 8-bit indices and pointers work correctly.
+    // that 8-bit indices and pointers work correctly on a matrix
+    // with i32 elements.
    %annotations = memref.alloc(%c2) : memref<?xi1>
    %sparse = constant true
    %dense = constant false
    memref.store %dense, %annotations[%c0] : memref<?xi1>
    memref.store %sparse, %annotations[%c1] : memref<?xi1>
    %u8 = constant 4 : index
-    %f32 = constant 2 : index
+    %i32 = constant 3 : index

    // Read the sparse matrix from file, construct sparse storage.
    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
-    %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %f32)
+    %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %i32)
      : (!Filename, memref<?xi1>, index, index, index) -> (!SparseTensor)

    // Initialize dense vectors.
-    %bdata = memref.alloc(%c256) : memref<?xf32>
-    %xdata = memref.alloc(%c4) : memref<?xf32>
+    %bdata = memref.alloc(%c256) : memref<?xi32>
+    %xdata = memref.alloc(%c4) : memref<?xi32>
    scf.for %i = %c0 to %c256 step %c1 {
      %k = addi %i, %c1 : index
-      %l = index_cast %k : index to i32
-      %f = sitofp %l : i32 to f32
-      memref.store %f, %bdata[%i] : memref<?xf32>
+      %j = index_cast %k : index to i32
+      memref.store %j, %bdata[%i] : memref<?xi32>
    }
    scf.for %i = %c0 to %c4 step %c1 {
-      memref.store %f0, %xdata[%i] : memref<?xf32>
+      memref.store %i0, %xdata[%i] : memref<?xi32>
    }
-    %b = memref.tensor_load %bdata : memref<?xf32>
-    %x = memref.tensor_load %xdata : memref<?xf32>
+    %b = memref.tensor_load %bdata : memref<?xi32>
+    %x = memref.tensor_load %xdata : memref<?xi32>

    // Call kernel.
    %0 = call @kernel_matvec(%a, %b, %x)
-      : (!SparseTensor, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
+      : (!SparseTensor, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>

    // Print the result for verification.
    //
-    // CHECK: ( 1659, 1534, 21, 18315 )
+    // CHECK: ( 889, 1514, -21, -3431 )
    //
-    %m = memref.buffer_cast %0 : memref<?xf32>
-    %v = vector.transfer_read %m[%c0], %f0: memref<?xf32>, vector<4xf32>
-    vector.print %v : vector<4xf32>
+    %m = memref.buffer_cast %0 : memref<?xi32>
+    %v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
+    vector.print %v : vector<4xi32>

    // Release the resources.
    call @delSparseTensor(%a) : (!SparseTensor) -> ()
-    memref.dealloc %bdata : memref<?xf32>
-    memref.dealloc %xdata : memref<?xf32>
+    memref.dealloc %bdata : memref<?xi32>
+    memref.dealloc %xdata : memref<?xi32>

    return
  }
--- a/mlir/test/Integration/data/wide.mtx
+++ b/mlir/test/Integration/data/wide.mtx
@ -4,20 +4,20 @@
 % see https://math.nist.gov/MatrixMarket
 %
 4 256 17
-1 1     1.0
-1 127   2.0
-1 128   3.0
-1 255   4.0
-2 2     5.0
-2 254   6.0
-3 3     7.0
-4 1     8.0
-4 2     9.0
-4 4    10.0
-4 99   11.0
-4 127  12.0
-4 128  13.0
-4 129  14.0
-4 250  15.0
-4 254  16.0
-4 256  17.0
+1 1    -1
+1 127   2
+1 128  -3
+1 255   4
+2 2    -5
+2 254   6
+3 3    -7
+4 1     8
+4 2    -9
+4 4    10
+4 99  -11
+4 127  12
+4 128 -13
+4 129  14
+4 250 -15
+4 254  16
+4 256 -17