//===- TensorOps.cpp - Implementation of the linalg TensorOps operation ---===// // // Copyright 2019 The MLIR Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= // // This file implements a simple IR operation to create new tensor computation // operations in the linalg dialect. // //===----------------------------------------------------------------------===// #include "linalg1/Analysis.h" #include "linalg1/Common.h" #include "linalg3/Intrinsics.h" #include "linalg3/Ops.h" #include "mlir/IR/Builders.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/StandardTypes.h" #include "llvm/ADT/STLExtras.h" using namespace mlir; using namespace mlir::edsc; using namespace mlir::edsc::intrinsics; using namespace linalg; using namespace linalg::intrinsics; ////////////////////////////////////////////////////////////////////////////// // Implementation of DotOp. ////////////////////////////////////////////////////////////////////////////// SmallVector linalg::DotOp::loopsToOperandRangeMaps() { // A(K), B(K), C() assert(getRanges(*this).size() == 2); auto *context = ScopedContext::getContext(); auto d0 = getAffineDimExpr(0, context); // K // A(K), B(K), C() // (d0) -> (d0, d0)(%k) return SmallVector{AffineMap::get(1, 0, {d0}, {}), // A(K) AffineMap::get(1, 0, {d0}, {}), // B(K) AffineMap()}; // C() } void linalg::DotOp::emitScalarImplementation( llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs) { using IndexedValue = TemplatedIndexedValue; assert(reductionIvs.size() == 1); auto innermostLoop = getForInductionVarOwner(reductionIvs.back()); auto *body = innermostLoop.getBody(); using edsc::op::operator+; using edsc::op::operator*; using edsc::op::operator==; using edsc::intrinsics::select; // Account for affine.terminator in loop. FuncBuilder builder(body, std::prev(body->end(), 1)); ScopedContext scope(builder, innermostLoop.getLoc()); FloatType fTy = getOperand(0) ->getType() .cast() .getElementType() .cast(); IndexHandle zero(constant_index(0)); ValueHandle zerof = constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy); IndexHandle r_i(reductionIvs[0]); IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2)); ValueHandle cond = (r_i == zero); ValueHandle scalarC = select(cond, zerof, *C()); C() = scalarC + A(r_i) * B(r_i); } ////////////////////////////////////////////////////////////////////////////// // Implementation of MatvecOp. ////////////////////////////////////////////////////////////////////////////// SmallVector linalg::MatvecOp::loopsToOperandRangeMaps() { // A(M, K), B(K), C(M) assert(getRanges(*this).size() == 4); auto *context = ScopedContext::getContext(); auto d0 = getAffineDimExpr(0, context); // M auto d1 = getAffineDimExpr(1, context); // K // A(M, K), B(K), C(M) // (d0, d1) -> (d0, d1, d1, d0)(%m, %k) return SmallVector{ AffineMap::get(2, 0, {d0, d1}, {}), // A(M, K) AffineMap::get(2, 0, {d1}, {}), // B(K) AffineMap::get(2, 0, {d0}, {})}; // C(M) } // The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j) // The body expression for dot is: C() = A(r_i) * B(r_i); // So we must drop the `i` loop from the matvec. void linalg::MatvecOp::writeAsFinerGrainTensorContraction() { auto *op = getOperation(); auto *vA(getInputView(0)), *vB(getInputView(1)), *vC(getOutputView(0)); auto indexingPosPair = getViewRootIndexing(vA, 0); assert( llvm::isa_and_nonnull(indexingPosPair.first->getDefiningOp())); // clang-format off FuncBuilder builder(op); ScopedContext scope(builder, op->getLoc()); IndexHandle i; using linalg::common::LoopNestRangeBuilder; LoopNestRangeBuilder(&i, ValueHandle(indexingPosPair.first))({ [&i, &vA, &vB, &vC]() { ValueHandle sliceA = slice(vA, i, 0); ValueHandle sliceC = slice(vC, i, 0); dot(sliceA, vB, sliceC); /// NestedBuilders expect handles, we thus return an IndexHandle. return IndexHandle(); }() }); // clang-format on } void linalg::MatvecOp::emitScalarImplementation( llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs) { using IndexedValue = TemplatedIndexedValue; assert(reductionIvs.size() == 1); auto innermostLoop = getForInductionVarOwner(reductionIvs.back()); auto *body = innermostLoop.getBody(); using edsc::op::operator+; using edsc::op::operator*; using edsc::op::operator==; using edsc::intrinsics::select; // Account for affine.terminator in loop. FuncBuilder builder(body, std::prev(body->end(), 1)); ScopedContext scope(builder, innermostLoop.getLoc()); FloatType fTy = getOperand(0) ->getType() .cast() .getElementType() .cast(); IndexHandle i(parallelIvs[0]), r_j(reductionIvs[0]); IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2)); IndexHandle zero(constant_index(0)); ValueHandle zerof = constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy); ValueHandle cond = (r_j == zero); ValueHandle scalarC = select(cond, zerof, *C(i)); C(i) = scalarC + A(i, r_j) * B(r_j); } ////////////////////////////////////////////////////////////////////////////// // Implementation of Matmul. ////////////////////////////////////////////////////////////////////////////// SmallVector linalg::MatmulOp::loopsToOperandRangeMaps() { // A(M, K), B(K, N), C(M, N) assert(getRanges(*this).size() == 6); auto *context = ScopedContext::getContext(); auto d0 = getAffineDimExpr(0, context); // M auto d1 = getAffineDimExpr(1, context); // N auto d2 = getAffineDimExpr(2, context); // K // A(M, K), B(K, N), C(M, N): // (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k) return SmallVector{ AffineMap::get(3, 0, {d0, d2}, {}), // A(M, K) AffineMap::get(3, 0, {d2, d1}, {}), // B(K, N) AffineMap::get(3, 0, {d0, d1}, {}) // C(M, N) }; } // The body expression for matmul is: C(i, j) = scalarC + A(i, r_k) * B(r_k, j) // The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j) // So we must drop the `j` loop from the matmul. // This is fine because parallel dimensions permute: we can just do it // declaratively. void linalg::MatmulOp::writeAsFinerGrainTensorContraction() { auto *op = getOperation(); auto *vA(getInputView(0)), *vB(getInputView(1)), *vC(getOutputView(0)); auto indexingPosPair = getViewRootIndexing(vB, 1); assert( llvm::isa_and_nonnull(indexingPosPair.first->getDefiningOp())); using linalg::common::LoopNestRangeBuilder; // clang-format off FuncBuilder builder(op); ScopedContext scope(builder, op->getLoc()); IndexHandle j; LoopNestRangeBuilder(&j, ValueHandle(indexingPosPair.first))({ [&j, &vA, &vB, &vC]() { ValueHandle sliceB = slice(vB, j, 1); ValueHandle sliceC = slice(vC, j, 1); matvec(vA, sliceB, sliceC); /// NestedBuilders expect handles, we thus return an IndexHandle. return IndexHandle(); }() }); // clang-format on } void linalg::MatmulOp::emitScalarImplementation( llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs) { using IndexedValue = TemplatedIndexedValue; assert(reductionIvs.size() == 1); auto innermostLoop = getForInductionVarOwner(reductionIvs.back()); auto *body = innermostLoop.getBody(); using edsc::op::operator+; using edsc::op::operator*; using edsc::op::operator==; using edsc::intrinsics::select; // Account for affine.terminator in loop. FuncBuilder builder(body, std::prev(body->end(), 1)); ScopedContext scope(builder, innermostLoop.getLoc()); FloatType fTy = getOperand(0) ->getType() .cast() .getElementType() .cast(); IndexHandle i(parallelIvs[0]), j(parallelIvs[1]), r_k(reductionIvs[0]); IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2)); IndexHandle zero(constant_index(0)); ValueHandle zerof = constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy); ValueHandle cond = r_k == zero; ValueHandle scalarC = select(cond, zerof, *C(i, j)); C(i, j) = scalarC + A(i, r_k) * B(r_k, j); }