From 018cfa94d963f2a62eeba15e84f101a108476b95 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <udayb@iisc.ac.in>
Date: Sat, 14 Sep 2019 12:10:18 -0700
Subject: [PATCH] Clean up build trip count analysis method - avoid mutating IR

- NFC - on any pass/utility logic/output.

- Resolve TODO; the method building loop trip count maps was
  creating and deleting affine.apply ops (transforming IR from under
  analysis!, strictly speaking). Introduce AffineValueMap::difference to
  do this correctly (without the need to create any IR).

- Move AffineApplyNormalizer out so that its methods are reusable from
  AffineStructures.cpp; add a helper method 'normalize' to it. Fix
  AffineApplyNormalize::renumberOneDim (Issue tensorflow/mlir#89).

- Trim includes on files touched.

- add test case on a scenario previously not covered

Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>

Closes tensorflow/mlir#133

COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/133 from bondhugula:trip-count-build 7fc34d857f7788f98b641792cafad6f5bd50e47b
PiperOrigin-RevId: 269101118
---
 mlir/include/mlir/Analysis/AffineStructures.h |  9 +-
 .../mlir/Dialect/AffineOps/AffineOps.h        | 72 ++++++++++++++++
 mlir/lib/Analysis/AffineStructures.cpp        | 38 ++++++++-
 mlir/lib/Analysis/LoopAnalysis.cpp            | 57 +++++--------
 mlir/lib/Dialect/AffineOps/AffineOps.cpp      | 82 +++----------------
 mlir/test/Transforms/unroll.mlir              | 30 ++++++-
 6 files changed, 174 insertions(+), 114 deletions(-)
diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h
index 50df6f856f49..d85120a21c2f 100644
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@@ -123,7 +123,6 @@ public:
   // Creates an empty AffineValueMap (users should call 'reset' to reset map
   // and operands).
   AffineValueMap() {}
-  AffineValueMap(AffineMap map);
   AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
                  ArrayRef<Value *> results = llvm::None);
 
@@ -136,6 +135,12 @@ public:
   void reset(AffineMap map, ArrayRef<Value *> operands,
              ArrayRef<Value *> results = llvm::None);
 
+  /// Return the value map that is the difference of value maps 'a' and 'b',
+  /// represented as an affine map and its operands. The output map + operands
+  /// are canonicalized and simplified.
+  static void difference(const AffineValueMap &a, const AffineValueMap &b,
+                         AffineValueMap *res);
+
   /// Return true if the idx^th result can be proved to be a multiple of
   /// 'factor', false otherwise.
   inline bool isMultipleOf(unsigned idx, int64_t factor) const;
@@ -150,6 +155,8 @@ public:
   /// Return true if this is an identity map.
   bool isIdentity() const;
 
+  void setResult(unsigned i, AffineExpr e) { map.setResult(i, e); }
+  AffineExpr getResult(unsigned i) { return map.getResult(i); }
   inline unsigned getNumOperands() const { return operands.size(); }
   inline unsigned getNumDims() const { return map.getNumDims(); }
   inline unsigned getNumSymbols() const { return map.getNumSymbols(); }
diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
index 03b945c0a5f5..60a1c686460c 100644
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
@@ -31,6 +31,7 @@
 
 namespace mlir {
 class AffineBound;
+class AffineDimExpr;
 class AffineValueMap;
 class AffineTerminatorOp;
 class FlatAffineConstraints;
@@ -597,6 +598,77 @@ private:
   friend class AffineForOp;
 };
 
+/// An `AffineApplyNormalizer` is a helper class that supports renumbering
+/// operands of AffineApplyOp. This acts as a reindexing map of Value* to
+/// positional dims or symbols and allows simplifications such as:
+///
+/// ```mlir
+///    %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
+/// ```
+///
+/// into:
+///
+/// ```mlir
+///    %1 = affine.apply () -> (0)
+/// ```
+struct AffineApplyNormalizer {
+  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
+
+  /// Returns the AffineMap resulting from normalization.
+  AffineMap getAffineMap() { return affineMap; }
+
+  SmallVector<Value *, 8> getOperands() {
+    SmallVector<Value *, 8> res(reorderedDims);
+    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
+    return res;
+  }
+
+  unsigned getNumSymbols() { return concatenatedSymbols.size(); }
+  unsigned getNumDims() { return reorderedDims.size(); }
+
+  /// Normalizes 'otherMap' and its operands 'otherOperands' to map to this
+  /// normalizer's coordinate space.
+  void normalize(AffineMap *otherMap, SmallVectorImpl<Value *> *otherOperands);
+
+private:
+  /// Helper function to insert `v` into the coordinate system of the current
+  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
+  /// renumbered position.
+  AffineDimExpr renumberOneDim(Value *v);
+
+  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
+  /// coordinate system of the current AffineApplyNormalizer.
+  /// Returns the rewritten AffineMap and updates the dims and symbols of
+  /// `this`.
+  AffineMap renumber(const AffineApplyNormalizer &other);
+
+  /// Maps of Value* to position in `affineMap`.
+  DenseMap<Value *, unsigned> dimValueToPosition;
+
+  /// Ordered dims and symbols matching positional dims and symbols in
+  /// `affineMap`.
+  SmallVector<Value *, 8> reorderedDims;
+  SmallVector<Value *, 8> concatenatedSymbols;
+
+  AffineMap affineMap;
+
+  /// Used with RAII to control the depth at which AffineApply are composed
+  /// recursively. Only accepts depth 1 for now to allow a behavior where a
+  /// newly composed AffineApplyOp does not increase the length of the chain of
+  /// AffineApplyOps. Full composition is implemented iteratively on top of
+  /// this behavior.
+  static unsigned &affineApplyDepth() {
+    static thread_local unsigned depth = 0;
+    return depth;
+  }
+  static constexpr unsigned kMaxAffineApplyDepth = 1;
+
+  AffineApplyNormalizer() { affineApplyDepth()++; }
+
+public:
+  ~AffineApplyNormalizer() { affineApplyDepth()--; }
+};
+
 } // end namespace mlir
 
 #endif
diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp
index 2804ac68b4ba..8efb9d11c538 100644
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@@ -23,11 +23,8 @@
 #include "mlir/Dialect/AffineOps/AffineOps.h"
 #include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/AffineMap.h"
 #include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Operation.h"
 #include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -228,6 +225,41 @@ void AffineValueMap::reset(AffineMap map, ArrayRef<Value *> operands,
   this->results.assign(results.begin(), results.end());
 }
 
+void AffineValueMap::difference(const AffineValueMap &a,
+                                const AffineValueMap &b, AffineValueMap *res) {
+  assert(a.getNumResults() == b.getNumResults() && "invalid inputs");
+
+  // Fully compose A's map + operands.
+  auto aMap = a.getAffineMap();
+  SmallVector<Value *, 4> aOperands(a.getOperands().begin(),
+                                    a.getOperands().end());
+  fullyComposeAffineMapAndOperands(&aMap, &aOperands);
+
+  // Use the affine apply normalizer to get B's map into A's coordinate space.
+  AffineApplyNormalizer normalizer(aMap, aOperands);
+  SmallVector<Value *, 4> bOperands(b.getOperands().begin(),
+                                    b.getOperands().end());
+  auto bMap = b.getAffineMap();
+  normalizer.normalize(&bMap, &bOperands);
+
+  assert(std::equal(bOperands.begin(), bOperands.end(),
+                    normalizer.getOperands().begin()) &&
+         "operands are expected to be the same after normalization");
+
+  // Construct the difference expressions.
+  SmallVector<AffineExpr, 4> diffExprs;
+  diffExprs.reserve(a.getNumResults());
+  for (unsigned i = 0, e = bMap.getNumResults(); i < e; ++i)
+    diffExprs.push_back(normalizer.getAffineMap().getResult(i) -
+                        bMap.getResult(i));
+
+  auto diffMap = AffineMap::get(normalizer.getNumDims(),
+                                normalizer.getNumSymbols(), diffExprs);
+  canonicalizeMapAndOperands(&diffMap, &bOperands);
+  diffMap = simplifyAffineMap(diffMap);
+  res->reset(diffMap, bOperands);
+}
+
 // Returns true and sets 'indexOfMatch' if 'valueToMatch' is found in
 // 'valuesToSearch' beginning at 'indexStart'. Returns false otherwise.
 static bool findIndex(Value *valueToMatch, ArrayRef<Value *> valuesToSearch,
diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp
index 21d47c3c1ea5..45012b0f9c46 100644
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@@ -24,14 +24,8 @@
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Analysis/VectorAnalysis.h"
 #include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/Functional.h"
 #include "mlir/Support/MathExtras.h"
 
 #include "llvm/ADT/DenseSet.h"
@@ -49,7 +43,7 @@ using namespace mlir;
 // pure analysis method relying on FlatAffineConstraints; the latter will also
 // be more powerful (since both inequalities and equalities will be considered).
 void mlir::buildTripCountMapAndOperands(
-    AffineForOp forOp, AffineMap *map,
+    AffineForOp forOp, AffineMap *tripCountMap,
     SmallVectorImpl<Value *> *tripCountOperands) {
   int64_t loopSpan;
 
@@ -62,48 +56,39 @@ void mlir::buildTripCountMapAndOperands(
     loopSpan = ub - lb;
     if (loopSpan < 0)
       loopSpan = 0;
-    *map = b.getConstantAffineMap(ceilDiv(loopSpan, step));
+    *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step));
     tripCountOperands->clear();
     return;
   }
   auto lbMap = forOp.getLowerBoundMap();
   auto ubMap = forOp.getUpperBoundMap();
   if (lbMap.getNumResults() != 1) {
-    *map = AffineMap();
+    *tripCountMap = AffineMap();
     return;
   }
   SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
   SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
-  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
-  SmallVector<Value *, 4> ubs;
-  ubs.reserve(ubMap.getNumResults());
-  for (auto ubExpr : ubMap.getResults())
-    ubs.push_back(b.create<AffineApplyOp>(
-        forOp.getLoc(),
-        b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}),
-        ubOperands));
 
-  tripCountOperands->clear();
-  tripCountOperands->reserve(1 + ubs.size());
-  tripCountOperands->push_back(lb);
-  tripCountOperands->append(ubs.begin(), ubs.end());
+  // Difference of each upper bound expression from the single lower bound
+  // expression (divided by the step) provides the expressions for the trip
+  // count map.
+  AffineValueMap ubValueMap(ubMap, ubOperands);
 
-  SmallVector<AffineExpr, 4> tripCountExprs(ubs.size());
-  for (unsigned i = 0, e = ubs.size(); i < e; i++)
-    tripCountExprs[i] =
-        (b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step);
-  *map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs);
+  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
+                                         lbMap.getResult(0));
+  auto lbMapSplat =
+      b.getAffineMap(lbMap.getNumDims(), lbMap.getNumSymbols(), lbSplatExpr);
+  AffineValueMap lbSplatValueMap(lbMapSplat, lbOperands);
 
-  fullyComposeAffineMapAndOperands(map, tripCountOperands);
-  *map = simplifyAffineMap(*map);
-  canonicalizeMapAndOperands(map, tripCountOperands);
-  // Remove any affine.apply's that became dead as a result of composition,
-  // simplification, and canonicalization above.
-  for (auto *v : ubs)
-    if (v->use_empty())
-      v->getDefiningOp()->erase();
-  if (lb.use_empty())
-    lb.erase();
+  AffineValueMap tripCountValueMap;
+  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
+  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
+    tripCountValueMap.setResult(i,
+                                tripCountValueMap.getResult(i).ceilDiv(step));
+
+  *tripCountMap = tripCountValueMap.getAffineMap();
+  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
+                            tripCountValueMap.getOperands().end());
 }
 
 /// Returns the trip count of the loop if it's a constant, None otherwise. This
diff --git a/mlir/lib/Dialect/AffineOps/AffineOps.cpp b/mlir/lib/Dialect/AffineOps/AffineOps.cpp
index b8fb7ca1261b..2c98806d072e 100644
--- a/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ b/mlir/lib/Dialect/AffineOps/AffineOps.cpp
@@ -17,8 +17,6 @@
 
 #include "mlir/Dialect/AffineOps/AffineOps.h"
 #include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Builders.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/IR/Matchers.h"
@@ -284,73 +282,6 @@ OpFoldResult AffineApplyOp::fold(ArrayRef<Attribute> operands) {
   return result[0];
 }
 
-namespace {
-/// An `AffineApplyNormalizer` is a helper class that is not visible to the user
-/// and supports renumbering operands of AffineApplyOp. This acts as a
-/// reindexing map of Value* to positional dims or symbols and allows
-/// simplifications such as:
-///
-/// ```mlir
-///    %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
-/// ```
-///
-/// into:
-///
-/// ```mlir
-///    %1 = affine.apply () -> (0)
-/// ```
-struct AffineApplyNormalizer {
-  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
-
-  /// Returns the AffineMap resulting from normalization.
-  AffineMap getAffineMap() { return affineMap; }
-
-  SmallVector<Value *, 8> getOperands() {
-    SmallVector<Value *, 8> res(reorderedDims);
-    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
-    return res;
-  }
-
-private:
-  /// Helper function to insert `v` into the coordinate system of the current
-  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
-  /// renumbered position.
-  AffineDimExpr renumberOneDim(Value *v);
-
-  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
-  /// coordinate system of the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyNormalizer &other);
-
-  /// Maps of Value* to position in `affineMap`.
-  DenseMap<Value *, unsigned> dimValueToPosition;
-
-  /// Ordered dims and symbols matching positional dims and symbols in
-  /// `affineMap`.
-  SmallVector<Value *, 8> reorderedDims;
-  SmallVector<Value *, 8> concatenatedSymbols;
-
-  AffineMap affineMap;
-
-  /// Used with RAII to control the depth at which AffineApply are composed
-  /// recursively. Only accepts depth 1 for now to allow a behavior where a
-  /// newly composed AffineApplyOp does not increase the length of the chain of
-  /// AffineApplyOps. Full composition is implemented iteratively on top of
-  /// this behavior.
-  static unsigned &affineApplyDepth() {
-    static thread_local unsigned depth = 0;
-    return depth;
-  }
-  static constexpr unsigned kMaxAffineApplyDepth = 1;
-
-  AffineApplyNormalizer() { affineApplyDepth()++; }
-
-public:
-  ~AffineApplyNormalizer() { affineApplyDepth()--; }
-};
-} // end anonymous namespace.
-
 AffineDimExpr AffineApplyNormalizer::renumberOneDim(Value *v) {
   DenseMap<Value *, unsigned>::iterator iterPos;
   bool inserted = false;
@@ -383,7 +314,8 @@ AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
                              other.concatenatedSymbols.end());
   auto map = other.affineMap;
   return map.replaceDimsAndSymbols(dimRemapping, symRemapping,
-                                   dimRemapping.size(), symRemapping.size());
+                                   reorderedDims.size(),
+                                   concatenatedSymbols.size());
 }
 
 // Gather the positions of the operands that are produced by an AffineApplyOp.
@@ -586,6 +518,16 @@ AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
   LLVM_DEBUG(dbgs() << "\n");
 }
 
+void AffineApplyNormalizer::normalize(AffineMap *otherMap,
+                                      SmallVectorImpl<Value *> *otherOperands) {
+  AffineApplyNormalizer other(*otherMap, *otherOperands);
+  *otherMap = renumber(other);
+
+  otherOperands->reserve(reorderedDims.size() + concatenatedSymbols.size());
+  otherOperands->assign(reorderedDims.begin(), reorderedDims.end());
+  otherOperands->append(concatenatedSymbols.begin(), concatenatedSymbols.end());
+}
+
 /// Implements `map` and `operands` composition and simplification to support
 /// `makeComposedAffineApply`. This can be called to achieve the same effects
 /// on `map` and `operands` without creating an AffineApplyOp that needs to be
diff --git a/mlir/test/Transforms/unroll.mlir b/mlir/test/Transforms/unroll.mlir
index b92de84073dc..208df58c84e8 100644
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@@ -509,6 +509,28 @@ func @loop_nest_symbolic_bound(%N : index) {
   return
 }
 
+// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound_with_step
+// UNROLL-BY-4-SAME: %[[N:.*]]: index
+func @loop_nest_symbolic_bound_with_step(%N : index) {
+  // UNROLL-BY-4: affine.for %arg1 = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to %N step 3 {
+      %x = "foo"() : () -> i32
+    }
+// UNROLL-BY-4:      affine.for %{{.*}} = 0 to #map{{[0-9]+}}()[%[[N]]] step 12 {
+// UNROLL-BY-4:        "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT: }
+// A cleanup loop will be be generated here.
+// UNROLL-BY-4-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}()[%[[N]]] to %[[N]] step 3 {
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4_NEXT: }
+  }
+  return
+}
+
 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
   affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
@@ -529,8 +551,8 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
 
 // The trip count here is a multiple of four, but this can be inferred only
 // through composition. Check for no cleanup loop.
-// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor
-func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
+// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound
+func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
   %T = affine.apply (d0) -> (4*d0 + 1)(%M)
   %K = affine.apply (d0) -> (d0 - 1) (%T)
   affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
@@ -542,8 +564,8 @@ func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
 // UNROLL-BY-4-NOT: for
 // UNROLL-BY-4: return
 
-// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
-func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
+// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt
+func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) {
   %K = affine.apply (d0) -> (4*d0) (%M)
   affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
     "foo"() : () -> ()