[mlir][sparse] support integral types i32,i16,i8 for *numerical* values

Some sparse matrices operate on integral values (in contrast with the common
f32 and f64 values). This CL expands the compiler and runtime support to deal
with several common type combinations.

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D99999
This commit is contained in:
Aart Bik 2021-04-06 16:46:27 -07:00
parent b3e9b07a7d
commit 3acf49829c
5 changed files with 142 additions and 161 deletions

View file

@ -132,6 +132,12 @@ public:
name = "sparseValuesF64";
else if (eltType.isF32())
name = "sparseValuesF32";
else if (eltType.isInteger(32))
name = "sparseValuesI32";
else if (eltType.isInteger(16))
name = "sparseValuesI16";
else if (eltType.isInteger(8))
name = "sparseValuesI8";
else
return failure();
rewriter.replaceOpWithNewOp<CallOp>(

View file

@ -837,11 +837,19 @@ static void genReductionEnd(Merger &merger, CodeGen &codegen,
assert(codegen.curVecLength == 1);
codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain
unsigned lhs = op.getNumShapedOperands() - 1;
if (red.getType().isa<VectorType>()) {
if (auto vtp = red.getType().dyn_cast<VectorType>()) {
// TODO: assumes + reductions for now
StringAttr kind = rewriter.getStringAttr("add");
Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
red = rewriter.create<vector::ReductionOp>(
op.getLoc(), ld.getType(), rewriter.getStringAttr("add"), red, ld);
// Integer reductions don't accept an accumulator.
if (vtp.getElementType().isa<IntegerType>()) {
red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
kind, red, ValueRange{});
red = rewriter.create<AddIOp>(op.getLoc(), red, ld);
} else {
red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
kind, red, ld);
}
}
genTensorStore(merger, codegen, rewriter, op, lhs, red);
}

View file

@ -127,6 +127,9 @@ public:
// Primary storage.
virtual void getValues(std::vector<double> **) { fatal("valf64"); }
virtual void getValues(std::vector<float> **) { fatal("valf32"); }
virtual void getValues(std::vector<int32_t> **) { fatal("vali32"); }
virtual void getValues(std::vector<int16_t> **) { fatal("vali16"); }
virtual void getValues(std::vector<int8_t> **) { fatal("vali8"); }
virtual ~SparseTensorStorageBase() {}
@ -453,65 +456,59 @@ char *getTensorFilename(uint64_t id) {
// implementation of a bufferized SparseTensor in MLIR. This could be replaced
// by actual codegen in MLIR.
//
// Because we cannot use C++ templates with C linkage, some macro magic is used
// to generate implementations for all required type combinations that can be
// called from MLIR generated code.
//
//===----------------------------------------------------------------------===//
// Cannot use templates with C linkage.
struct MemRef1DU64 {
const uint64_t *base;
const uint64_t *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
struct MemRef1DU32 {
const uint32_t *base;
const uint32_t *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
struct MemRef1DU16 {
const uint16_t *base;
const uint16_t *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
struct MemRef1DU8 {
const uint8_t *base;
const uint8_t *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
struct MemRef1DF64 {
const double *base;
const double *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
struct MemRef1DF32 {
const float *base;
const float *data;
uint64_t off;
uint64_t sizes[1];
uint64_t strides[1];
};
enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
enum PrimaryTypeEnum : uint64_t { kF64 = 1, kF32 = 2 };
#define TEMPLATE(NAME, TYPE) \
struct NAME { \
const TYPE *base; \
const TYPE *data; \
uint64_t off; \
uint64_t sizes[1]; \
uint64_t strides[1]; \
}
#define CASE(p, i, v, P, I, V) \
if (ptrTp == (p) && indTp == (i) && valTp == (v)) \
return newSparseTensor<P, I, V>(filename, sparsity, asize)
#define IMPL1(RET, NAME, TYPE, LIB) \
RET NAME(void *tensor) { \
std::vector<TYPE> *v; \
static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v); \
return {v->data(), v->data(), 0, {v->size()}, {1}}; \
}
#define IMPL2(RET, NAME, TYPE, LIB) \
RET NAME(void *tensor, uint64_t d) { \
std::vector<TYPE> *v; \
static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v, d); \
return {v->data(), v->data(), 0, {v->size()}, {1}}; \
}
TEMPLATE(MemRef1DU64, uint64_t);
TEMPLATE(MemRef1DU32, uint32_t);
TEMPLATE(MemRef1DU16, uint16_t);
TEMPLATE(MemRef1DU8, uint8_t);
TEMPLATE(MemRef1DI32, int32_t);
TEMPLATE(MemRef1DI16, int16_t);
TEMPLATE(MemRef1DI8, int8_t);
TEMPLATE(MemRef1DF64, double);
TEMPLATE(MemRef1DF32, float);
enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
enum PrimaryTypeEnum : uint64_t {
kF64 = 1,
kF32 = 2,
kI32 = 3,
kI16 = 4,
kI8 = 5
};
void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
uint64_t asize, uint64_t astride, uint64_t ptrTp,
uint64_t indTp, uint64_t valTp) {
@ -534,6 +531,17 @@ void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
CASE(kU16, kU16, kF32, uint16_t, uint16_t, float);
CASE(kU8, kU8, kF32, uint8_t, uint8_t, float);
// Integral matrices with low overhead storage.
CASE(kU32, kU32, kI32, uint32_t, uint32_t, int32_t);
CASE(kU32, kU32, kI16, uint32_t, uint32_t, int16_t);
CASE(kU32, kU32, kI8, uint32_t, uint32_t, int8_t);
CASE(kU16, kU16, kI32, uint16_t, uint16_t, int32_t);
CASE(kU16, kU16, kI16, uint16_t, uint16_t, int16_t);
CASE(kU16, kU16, kI8, uint16_t, uint16_t, int8_t);
CASE(kU8, kU8, kI32, uint8_t, uint8_t, int32_t);
CASE(kU8, kU8, kI16, uint8_t, uint8_t, int16_t);
CASE(kU8, kU8, kI8, uint8_t, uint8_t, int8_t);
// Unsupported case (add above if needed).
fputs("unsupported combination of types\n", stderr);
exit(1);
@ -545,70 +553,29 @@ uint64_t sparseDimSize(void *tensor, uint64_t d) {
return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
}
MemRef1DU64 sparsePointers64(void *tensor, uint64_t d) {
std::vector<uint64_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU32 sparsePointers32(void *tensor, uint64_t d) {
std::vector<uint32_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU16 sparsePointers16(void *tensor, uint64_t d) {
std::vector<uint16_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU8 sparsePointers8(void *tensor, uint64_t d) {
std::vector<uint8_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU64 sparseIndices64(void *tensor, uint64_t d) {
std::vector<uint64_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU32 sparseIndices32(void *tensor, uint64_t d) {
std::vector<uint32_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU16 sparseIndices16(void *tensor, uint64_t d) {
std::vector<uint16_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DU8 sparseIndices8(void *tensor, uint64_t d) {
std::vector<uint8_t> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DF64 sparseValuesF64(void *tensor) {
std::vector<double> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
MemRef1DF32 sparseValuesF32(void *tensor) {
std::vector<float> *v;
static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
return {v->data(), v->data(), 0, {v->size()}, {1}};
}
IMPL2(MemRef1DU64, sparsePointers64, uint64_t, getPointers)
IMPL2(MemRef1DU32, sparsePointers32, uint32_t, getPointers)
IMPL2(MemRef1DU16, sparsePointers16, uint16_t, getPointers)
IMPL2(MemRef1DU8, sparsePointers8, uint8_t, getPointers)
IMPL2(MemRef1DU64, sparseIndices64, uint64_t, getIndices)
IMPL2(MemRef1DU32, sparseIndices32, uint32_t, getIndices)
IMPL2(MemRef1DU16, sparseIndices16, uint16_t, getIndices)
IMPL2(MemRef1DU8, sparseIndices8, uint8_t, getIndices)
IMPL1(MemRef1DF64, sparseValuesF64, double, getValues)
IMPL1(MemRef1DF32, sparseValuesF32, float, getValues)
IMPL1(MemRef1DI32, sparseValuesI32, int32_t, getValues)
IMPL1(MemRef1DI16, sparseValuesI16, int16_t, getValues)
IMPL1(MemRef1DI8, sparseValuesI8, int8_t, getValues)
void delSparseTensor(void *tensor) {
delete static_cast<SparseTensorStorageBase *>(tensor);
}
#undef TEMPLATE
#undef CASE
#undef IMPL1
#undef IMPL2
} // extern "C"
#endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS

View file

@ -54,18 +54,18 @@ module {
// a sparse matrix A with a dense vector b into a dense vector x.
//
func @kernel_matvec(%argA: !SparseTensor,
%argb: tensor<?xf32>,
%argx: tensor<?xf32>) -> tensor<?xf32> {
%arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xf32>
%argb: tensor<?xi32>,
%argx: tensor<?xi32>) -> tensor<?xi32> {
%arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xi32>
%0 = linalg.generic #matvec
ins(%arga, %argb: tensor<?x?xf32>, tensor<?xf32>)
outs(%argx: tensor<?xf32>) {
^bb(%a: f32, %b: f32, %x: f32):
%0 = mulf %a, %b : f32
%1 = addf %x, %0 : f32
linalg.yield %1 : f32
} -> tensor<?xf32>
return %0 : tensor<?xf32>
ins(%arga, %argb: tensor<?x?xi32>, tensor<?xi32>)
outs(%argx: tensor<?xi32>) {
^bb(%a: i32, %b: i32, %x: i32):
%0 = muli %a, %b : i32
%1 = addi %x, %0 : i32
linalg.yield %1 : i32
} -> tensor<?xi32>
return %0 : tensor<?xi32>
}
//
@ -79,7 +79,7 @@ module {
// Main driver that reads matrix from file and calls the sparse kernel.
//
func @entry() {
%f0 = constant 0.0 : f32
%i0 = constant 0 : i32
%c0 = constant 0 : index
%c1 = constant 1 : index
%c2 = constant 2 : index
@ -89,51 +89,51 @@ module {
// Mark inner dimension of the matrix as sparse and encode the
// storage scheme types (this must match the metadata in the
// alias above and compiler switches). In this case, we test
// that 8-bit indices and pointers work correctly.
// that 8-bit indices and pointers work correctly on a matrix
// with i32 elements.
%annotations = memref.alloc(%c2) : memref<?xi1>
%sparse = constant true
%dense = constant false
memref.store %dense, %annotations[%c0] : memref<?xi1>
memref.store %sparse, %annotations[%c1] : memref<?xi1>
%u8 = constant 4 : index
%f32 = constant 2 : index
%i32 = constant 3 : index
// Read the sparse matrix from file, construct sparse storage.
%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
%a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %f32)
%a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %i32)
: (!Filename, memref<?xi1>, index, index, index) -> (!SparseTensor)
// Initialize dense vectors.
%bdata = memref.alloc(%c256) : memref<?xf32>
%xdata = memref.alloc(%c4) : memref<?xf32>
%bdata = memref.alloc(%c256) : memref<?xi32>
%xdata = memref.alloc(%c4) : memref<?xi32>
scf.for %i = %c0 to %c256 step %c1 {
%k = addi %i, %c1 : index
%l = index_cast %k : index to i32
%f = sitofp %l : i32 to f32
memref.store %f, %bdata[%i] : memref<?xf32>
%j = index_cast %k : index to i32
memref.store %j, %bdata[%i] : memref<?xi32>
}
scf.for %i = %c0 to %c4 step %c1 {
memref.store %f0, %xdata[%i] : memref<?xf32>
memref.store %i0, %xdata[%i] : memref<?xi32>
}
%b = memref.tensor_load %bdata : memref<?xf32>
%x = memref.tensor_load %xdata : memref<?xf32>
%b = memref.tensor_load %bdata : memref<?xi32>
%x = memref.tensor_load %xdata : memref<?xi32>
// Call kernel.
%0 = call @kernel_matvec(%a, %b, %x)
: (!SparseTensor, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
: (!SparseTensor, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
// Print the result for verification.
//
// CHECK: ( 1659, 1534, 21, 18315 )
// CHECK: ( 889, 1514, -21, -3431 )
//
%m = memref.buffer_cast %0 : memref<?xf32>
%v = vector.transfer_read %m[%c0], %f0: memref<?xf32>, vector<4xf32>
vector.print %v : vector<4xf32>
%m = memref.buffer_cast %0 : memref<?xi32>
%v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
vector.print %v : vector<4xi32>
// Release the resources.
call @delSparseTensor(%a) : (!SparseTensor) -> ()
memref.dealloc %bdata : memref<?xf32>
memref.dealloc %xdata : memref<?xf32>
memref.dealloc %bdata : memref<?xi32>
memref.dealloc %xdata : memref<?xi32>
return
}

View file

@ -4,20 +4,20 @@
% see https://math.nist.gov/MatrixMarket
%
4 256 17
1 1 1.0
1 127 2.0
1 128 3.0
1 255 4.0
2 2 5.0
2 254 6.0
3 3 7.0
4 1 8.0
4 2 9.0
4 4 10.0
4 99 11.0
4 127 12.0
4 128 13.0
4 129 14.0
4 250 15.0
4 254 16.0
4 256 17.0
1 1 -1
1 127 2
1 128 -3
1 255 4
2 2 -5
2 254 6
3 3 -7
4 1 8
4 2 -9
4 4 10
4 99 -11
4 127 12
4 128 -13
4 129 14
4 250 -15
4 254 16
4 256 -17