From 018cfa94d963f2a62eeba15e84f101a108476b95 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Sat, 14 Sep 2019 12:10:18 -0700 Subject: [PATCH] Clean up build trip count analysis method - avoid mutating IR - NFC - on any pass/utility logic/output. - Resolve TODO; the method building loop trip count maps was creating and deleting affine.apply ops (transforming IR from under analysis!, strictly speaking). Introduce AffineValueMap::difference to do this correctly (without the need to create any IR). - Move AffineApplyNormalizer out so that its methods are reusable from AffineStructures.cpp; add a helper method 'normalize' to it. Fix AffineApplyNormalize::renumberOneDim (Issue tensorflow/mlir#89). - Trim includes on files touched. - add test case on a scenario previously not covered Signed-off-by: Uday Bondhugula Closes tensorflow/mlir#133 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/133 from bondhugula:trip-count-build 7fc34d857f7788f98b641792cafad6f5bd50e47b PiperOrigin-RevId: 269101118 --- mlir/include/mlir/Analysis/AffineStructures.h | 9 +- .../mlir/Dialect/AffineOps/AffineOps.h | 72 ++++++++++++++++ mlir/lib/Analysis/AffineStructures.cpp | 38 ++++++++- mlir/lib/Analysis/LoopAnalysis.cpp | 57 +++++-------- mlir/lib/Dialect/AffineOps/AffineOps.cpp | 82 +++---------------- mlir/test/Transforms/unroll.mlir | 30 ++++++- 6 files changed, 174 insertions(+), 114 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 50df6f856f49..d85120a21c2f 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -123,7 +123,6 @@ public: // Creates an empty AffineValueMap (users should call 'reset' to reset map // and operands). AffineValueMap() {} - AffineValueMap(AffineMap map); AffineValueMap(AffineMap map, ArrayRef operands, ArrayRef results = llvm::None); @@ -136,6 +135,12 @@ public: void reset(AffineMap map, ArrayRef operands, ArrayRef results = llvm::None); + /// Return the value map that is the difference of value maps 'a' and 'b', + /// represented as an affine map and its operands. The output map + operands + /// are canonicalized and simplified. + static void difference(const AffineValueMap &a, const AffineValueMap &b, + AffineValueMap *res); + /// Return true if the idx^th result can be proved to be a multiple of /// 'factor', false otherwise. inline bool isMultipleOf(unsigned idx, int64_t factor) const; @@ -150,6 +155,8 @@ public: /// Return true if this is an identity map. bool isIdentity() const; + void setResult(unsigned i, AffineExpr e) { map.setResult(i, e); } + AffineExpr getResult(unsigned i) { return map.getResult(i); } inline unsigned getNumOperands() const { return operands.size(); } inline unsigned getNumDims() const { return map.getNumDims(); } inline unsigned getNumSymbols() const { return map.getNumSymbols(); } diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h index 03b945c0a5f5..60a1c686460c 100644 --- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h +++ b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h @@ -31,6 +31,7 @@ namespace mlir { class AffineBound; +class AffineDimExpr; class AffineValueMap; class AffineTerminatorOp; class FlatAffineConstraints; @@ -597,6 +598,77 @@ private: friend class AffineForOp; }; +/// An `AffineApplyNormalizer` is a helper class that supports renumbering +/// operands of AffineApplyOp. This acts as a reindexing map of Value* to +/// positional dims or symbols and allows simplifications such as: +/// +/// ```mlir +/// %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0) +/// ``` +/// +/// into: +/// +/// ```mlir +/// %1 = affine.apply () -> (0) +/// ``` +struct AffineApplyNormalizer { + AffineApplyNormalizer(AffineMap map, ArrayRef operands); + + /// Returns the AffineMap resulting from normalization. + AffineMap getAffineMap() { return affineMap; } + + SmallVector getOperands() { + SmallVector res(reorderedDims); + res.append(concatenatedSymbols.begin(), concatenatedSymbols.end()); + return res; + } + + unsigned getNumSymbols() { return concatenatedSymbols.size(); } + unsigned getNumDims() { return reorderedDims.size(); } + + /// Normalizes 'otherMap' and its operands 'otherOperands' to map to this + /// normalizer's coordinate space. + void normalize(AffineMap *otherMap, SmallVectorImpl *otherOperands); + +private: + /// Helper function to insert `v` into the coordinate system of the current + /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding + /// renumbered position. + AffineDimExpr renumberOneDim(Value *v); + + /// Given an `other` normalizer, this rewrites `other.affineMap` in the + /// coordinate system of the current AffineApplyNormalizer. + /// Returns the rewritten AffineMap and updates the dims and symbols of + /// `this`. + AffineMap renumber(const AffineApplyNormalizer &other); + + /// Maps of Value* to position in `affineMap`. + DenseMap dimValueToPosition; + + /// Ordered dims and symbols matching positional dims and symbols in + /// `affineMap`. + SmallVector reorderedDims; + SmallVector concatenatedSymbols; + + AffineMap affineMap; + + /// Used with RAII to control the depth at which AffineApply are composed + /// recursively. Only accepts depth 1 for now to allow a behavior where a + /// newly composed AffineApplyOp does not increase the length of the chain of + /// AffineApplyOps. Full composition is implemented iteratively on top of + /// this behavior. + static unsigned &affineApplyDepth() { + static thread_local unsigned depth = 0; + return depth; + } + static constexpr unsigned kMaxAffineApplyDepth = 1; + + AffineApplyNormalizer() { affineApplyDepth()++; } + +public: + ~AffineApplyNormalizer() { affineApplyDepth()--; } +}; + } // end namespace mlir #endif diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 2804ac68b4ba..8efb9d11c538 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -23,11 +23,8 @@ #include "mlir/Dialect/AffineOps/AffineOps.h" #include "mlir/Dialect/StandardOps/Ops.h" #include "mlir/IR/AffineExprVisitor.h" -#include "mlir/IR/AffineMap.h" #include "mlir/IR/IntegerSet.h" -#include "mlir/IR/Operation.h" #include "mlir/Support/MathExtras.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -228,6 +225,41 @@ void AffineValueMap::reset(AffineMap map, ArrayRef operands, this->results.assign(results.begin(), results.end()); } +void AffineValueMap::difference(const AffineValueMap &a, + const AffineValueMap &b, AffineValueMap *res) { + assert(a.getNumResults() == b.getNumResults() && "invalid inputs"); + + // Fully compose A's map + operands. + auto aMap = a.getAffineMap(); + SmallVector aOperands(a.getOperands().begin(), + a.getOperands().end()); + fullyComposeAffineMapAndOperands(&aMap, &aOperands); + + // Use the affine apply normalizer to get B's map into A's coordinate space. + AffineApplyNormalizer normalizer(aMap, aOperands); + SmallVector bOperands(b.getOperands().begin(), + b.getOperands().end()); + auto bMap = b.getAffineMap(); + normalizer.normalize(&bMap, &bOperands); + + assert(std::equal(bOperands.begin(), bOperands.end(), + normalizer.getOperands().begin()) && + "operands are expected to be the same after normalization"); + + // Construct the difference expressions. + SmallVector diffExprs; + diffExprs.reserve(a.getNumResults()); + for (unsigned i = 0, e = bMap.getNumResults(); i < e; ++i) + diffExprs.push_back(normalizer.getAffineMap().getResult(i) - + bMap.getResult(i)); + + auto diffMap = AffineMap::get(normalizer.getNumDims(), + normalizer.getNumSymbols(), diffExprs); + canonicalizeMapAndOperands(&diffMap, &bOperands); + diffMap = simplifyAffineMap(diffMap); + res->reset(diffMap, bOperands); +} + // Returns true and sets 'indexOfMatch' if 'valueToMatch' is found in // 'valuesToSearch' beginning at 'indexStart'. Returns false otherwise. static bool findIndex(Value *valueToMatch, ArrayRef valuesToSearch, diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index 21d47c3c1ea5..45012b0f9c46 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -24,14 +24,8 @@ #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/NestedMatcher.h" -#include "mlir/Analysis/VectorAnalysis.h" #include "mlir/Dialect/AffineOps/AffineOps.h" -#include "mlir/Dialect/StandardOps/Ops.h" #include "mlir/Dialect/VectorOps/VectorOps.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Operation.h" -#include "mlir/Support/Functional.h" #include "mlir/Support/MathExtras.h" #include "llvm/ADT/DenseSet.h" @@ -49,7 +43,7 @@ using namespace mlir; // pure analysis method relying on FlatAffineConstraints; the latter will also // be more powerful (since both inequalities and equalities will be considered). void mlir::buildTripCountMapAndOperands( - AffineForOp forOp, AffineMap *map, + AffineForOp forOp, AffineMap *tripCountMap, SmallVectorImpl *tripCountOperands) { int64_t loopSpan; @@ -62,48 +56,39 @@ void mlir::buildTripCountMapAndOperands( loopSpan = ub - lb; if (loopSpan < 0) loopSpan = 0; - *map = b.getConstantAffineMap(ceilDiv(loopSpan, step)); + *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step)); tripCountOperands->clear(); return; } auto lbMap = forOp.getLowerBoundMap(); auto ubMap = forOp.getUpperBoundMap(); if (lbMap.getNumResults() != 1) { - *map = AffineMap(); + *tripCountMap = AffineMap(); return; } SmallVector lbOperands(forOp.getLowerBoundOperands()); SmallVector ubOperands(forOp.getUpperBoundOperands()); - auto lb = b.create(forOp.getLoc(), lbMap, lbOperands); - SmallVector ubs; - ubs.reserve(ubMap.getNumResults()); - for (auto ubExpr : ubMap.getResults()) - ubs.push_back(b.create( - forOp.getLoc(), - b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}), - ubOperands)); - tripCountOperands->clear(); - tripCountOperands->reserve(1 + ubs.size()); - tripCountOperands->push_back(lb); - tripCountOperands->append(ubs.begin(), ubs.end()); + // Difference of each upper bound expression from the single lower bound + // expression (divided by the step) provides the expressions for the trip + // count map. + AffineValueMap ubValueMap(ubMap, ubOperands); - SmallVector tripCountExprs(ubs.size()); - for (unsigned i = 0, e = ubs.size(); i < e; i++) - tripCountExprs[i] = - (b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step); - *map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs); + SmallVector lbSplatExpr(ubValueMap.getNumResults(), + lbMap.getResult(0)); + auto lbMapSplat = + b.getAffineMap(lbMap.getNumDims(), lbMap.getNumSymbols(), lbSplatExpr); + AffineValueMap lbSplatValueMap(lbMapSplat, lbOperands); - fullyComposeAffineMapAndOperands(map, tripCountOperands); - *map = simplifyAffineMap(*map); - canonicalizeMapAndOperands(map, tripCountOperands); - // Remove any affine.apply's that became dead as a result of composition, - // simplification, and canonicalization above. - for (auto *v : ubs) - if (v->use_empty()) - v->getDefiningOp()->erase(); - if (lb.use_empty()) - lb.erase(); + AffineValueMap tripCountValueMap; + AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap); + for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i) + tripCountValueMap.setResult(i, + tripCountValueMap.getResult(i).ceilDiv(step)); + + *tripCountMap = tripCountValueMap.getAffineMap(); + tripCountOperands->assign(tripCountValueMap.getOperands().begin(), + tripCountValueMap.getOperands().end()); } /// Returns the trip count of the loop if it's a constant, None otherwise. This diff --git a/mlir/lib/Dialect/AffineOps/AffineOps.cpp b/mlir/lib/Dialect/AffineOps/AffineOps.cpp index b8fb7ca1261b..2c98806d072e 100644 --- a/mlir/lib/Dialect/AffineOps/AffineOps.cpp +++ b/mlir/lib/Dialect/AffineOps/AffineOps.cpp @@ -17,8 +17,6 @@ #include "mlir/Dialect/AffineOps/AffineOps.h" #include "mlir/Dialect/StandardOps/Ops.h" -#include "mlir/IR/Block.h" -#include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/Matchers.h" @@ -284,73 +282,6 @@ OpFoldResult AffineApplyOp::fold(ArrayRef operands) { return result[0]; } -namespace { -/// An `AffineApplyNormalizer` is a helper class that is not visible to the user -/// and supports renumbering operands of AffineApplyOp. This acts as a -/// reindexing map of Value* to positional dims or symbols and allows -/// simplifications such as: -/// -/// ```mlir -/// %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0) -/// ``` -/// -/// into: -/// -/// ```mlir -/// %1 = affine.apply () -> (0) -/// ``` -struct AffineApplyNormalizer { - AffineApplyNormalizer(AffineMap map, ArrayRef operands); - - /// Returns the AffineMap resulting from normalization. - AffineMap getAffineMap() { return affineMap; } - - SmallVector getOperands() { - SmallVector res(reorderedDims); - res.append(concatenatedSymbols.begin(), concatenatedSymbols.end()); - return res; - } - -private: - /// Helper function to insert `v` into the coordinate system of the current - /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding - /// renumbered position. - AffineDimExpr renumberOneDim(Value *v); - - /// Given an `other` normalizer, this rewrites `other.affineMap` in the - /// coordinate system of the current AffineApplyNormalizer. - /// Returns the rewritten AffineMap and updates the dims and symbols of - /// `this`. - AffineMap renumber(const AffineApplyNormalizer &other); - - /// Maps of Value* to position in `affineMap`. - DenseMap dimValueToPosition; - - /// Ordered dims and symbols matching positional dims and symbols in - /// `affineMap`. - SmallVector reorderedDims; - SmallVector concatenatedSymbols; - - AffineMap affineMap; - - /// Used with RAII to control the depth at which AffineApply are composed - /// recursively. Only accepts depth 1 for now to allow a behavior where a - /// newly composed AffineApplyOp does not increase the length of the chain of - /// AffineApplyOps. Full composition is implemented iteratively on top of - /// this behavior. - static unsigned &affineApplyDepth() { - static thread_local unsigned depth = 0; - return depth; - } - static constexpr unsigned kMaxAffineApplyDepth = 1; - - AffineApplyNormalizer() { affineApplyDepth()++; } - -public: - ~AffineApplyNormalizer() { affineApplyDepth()--; } -}; -} // end anonymous namespace. - AffineDimExpr AffineApplyNormalizer::renumberOneDim(Value *v) { DenseMap::iterator iterPos; bool inserted = false; @@ -383,7 +314,8 @@ AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) { other.concatenatedSymbols.end()); auto map = other.affineMap; return map.replaceDimsAndSymbols(dimRemapping, symRemapping, - dimRemapping.size(), symRemapping.size()); + reorderedDims.size(), + concatenatedSymbols.size()); } // Gather the positions of the operands that are produced by an AffineApplyOp. @@ -586,6 +518,16 @@ AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map, LLVM_DEBUG(dbgs() << "\n"); } +void AffineApplyNormalizer::normalize(AffineMap *otherMap, + SmallVectorImpl *otherOperands) { + AffineApplyNormalizer other(*otherMap, *otherOperands); + *otherMap = renumber(other); + + otherOperands->reserve(reorderedDims.size() + concatenatedSymbols.size()); + otherOperands->assign(reorderedDims.begin(), reorderedDims.end()); + otherOperands->append(concatenatedSymbols.begin(), concatenatedSymbols.end()); +} + /// Implements `map` and `operands` composition and simplification to support /// `makeComposedAffineApply`. This can be called to achieve the same effects /// on `map` and `operands` without creating an AffineApplyOp that needs to be diff --git a/mlir/test/Transforms/unroll.mlir b/mlir/test/Transforms/unroll.mlir index b92de84073dc..208df58c84e8 100644 --- a/mlir/test/Transforms/unroll.mlir +++ b/mlir/test/Transforms/unroll.mlir @@ -509,6 +509,28 @@ func @loop_nest_symbolic_bound(%N : index) { return } +// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound_with_step +// UNROLL-BY-4-SAME: %[[N:.*]]: index +func @loop_nest_symbolic_bound_with_step(%N : index) { + // UNROLL-BY-4: affine.for %arg1 = 0 to 100 { + affine.for %i = 0 to 100 { + affine.for %j = 0 to %N step 3 { + %x = "foo"() : () -> i32 + } +// UNROLL-BY-4: affine.for %{{.*}} = 0 to #map{{[0-9]+}}()[%[[N]]] step 12 { +// UNROLL-BY-4: "foo"() +// UNROLL-BY-4-NEXT: "foo"() +// UNROLL-BY-4-NEXT: "foo"() +// UNROLL-BY-4-NEXT: "foo"() +// UNROLL-BY-4-NEXT: } +// A cleanup loop will be be generated here. +// UNROLL-BY-4-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}()[%[[N]]] to %[[N]] step 3 { +// UNROLL-BY-4-NEXT: "foo"() +// UNROLL-BY-4_NEXT: } + } + return +} + // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) { affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] { @@ -529,8 +551,8 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) // The trip count here is a multiple of four, but this can be inferred only // through composition. Check for no cleanup loop. -// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor -func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) { +// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound +func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) { %T = affine.apply (d0) -> (4*d0 + 1)(%M) %K = affine.apply (d0) -> (d0 - 1) (%T) affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) { @@ -542,8 +564,8 @@ func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) { // UNROLL-BY-4-NOT: for // UNROLL-BY-4: return -// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2 -func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) { +// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt +func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) { %K = affine.apply (d0) -> (4*d0) (%M) affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] { "foo"() : () -> ()