llvm/bolt/ReorderAlgorithm.h

//===- ReorderAlgorithm.h - Interface for basic block reorderng algorithms ===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface to different basic block reordering algorithms.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_BOLT_REORDER_ALGORITHM_H
#define LLVM_TOOLS_LLVM_BOLT_REORDER_ALGORITHM_H

#include "BinaryFunction.h"
#include "llvm/Support/ErrorHandling.h"
#include <unordered_map>
#include <memory>
#include <vector>


namespace llvm {

class raw_ostream;


namespace bolt {

class BinaryBasicBlock;
class BinaryFunction;

/// Objects of this class implement various basic block clustering algorithms.
/// Basic block clusters are chains of basic blocks that should be laid out
/// in this order to maximize performace. These algorithms group basic blocks
/// into clusters using execution profile data and various heuristics.
class ClusterAlgorithm {
public:
  using ClusterTy = std::vector<BinaryBasicBlock *>;
  std::vector<ClusterTy> Clusters;
  std::vector<std::unordered_map<uint32_t, uint64_t>> ClusterEdges;
  std::vector<double> AvgFreq;

  /// Group the basic blocks in the given function into clusters stored in the
  /// Clusters vector. Also encode relative weights between two clusters in
  /// the ClusterEdges vector if requested. This vector is indexed by
  /// the clusters indices in the Clusters vector.
  virtual void clusterBasicBlocks(const BinaryFunction &BF,
                                  bool ComputeEdges = false) = 0;

  /// Compute for each cluster its averagae execution frequency, that is
  /// the sum of average frequencies of its blocks (execution count / # instrs).
  /// The average frequencies are stored in the AvgFreq vector, index by the
  /// cluster indices in the Clusters vector.
  void computeClusterAverageFrequency();

  /// Clear clusters and related info.
  virtual void reset();

  void printClusters() const;

  virtual ~ClusterAlgorithm() {}
};


/// Base class for a greedy clustering algorithm that selects edges in order
/// based on some heuristic and uses them to join basic blocks into clusters.
class GreedyClusterAlgorithm : public ClusterAlgorithm {
protected:
  // Represents an edge between two basic blocks, with source, destination, and
  // profile count.
  struct EdgeTy {
    const BinaryBasicBlock *Src;
    const BinaryBasicBlock *Dst;
    uint64_t Count;

    EdgeTy(const BinaryBasicBlock *Src, const BinaryBasicBlock *Dst,
           uint64_t Count) :
      Src(Src), Dst(Dst), Count(Count) {}

    void print(raw_ostream &OS) const;
  };

  struct EdgeHash {
   size_t operator() (const EdgeTy &E) const;
  };

  struct EdgeEqual {
    bool operator() (const EdgeTy &A, const EdgeTy &B) const;
  };

  // Virtual methods that allow custom specialization of the heuristic used by
  // the algorithm to select edges.
  virtual void initQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF) = 0;
  virtual void adjustQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF) = 0;
  virtual bool areClustersCompatible(
      const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const = 0;

  // Map from basic block to owning cluster index.
  using BBToClusterMapTy = std::unordered_map<const BinaryBasicBlock *,
                                              unsigned>;
  BBToClusterMapTy BBToClusterMap;

public:
  void clusterBasicBlocks(const BinaryFunction &BF,
                          bool ComputeEdges = false) override;
  void reset() override;
};


/// This clustering algorithm is based on a greedy heuristic suggested by
/// Pettis and Hansen (PLDI '90).
class PHGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
protected:
  void initQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF)  override;
  void adjustQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
  bool areClustersCompatible(
      const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const
  override;
};


/// This clustering algorithm is based on a greedy heuristic that is a
/// modification of the heuristic suggested by Pettis (PLDI '90). It is
/// geared towards minimizing branches.
class MinBranchGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
private:
  // Map from an edge to its weight which is used by the algorithm to sort the
  // edges.
  std::unordered_map<EdgeTy, int64_t, EdgeHash, EdgeEqual> Weight;

  // The weight of an edge is calculated as the win in branches if we choose
  // to layout this edge as a fall-through. For example, consider the edges
  //  A -> B with execution count 500,
  //  A -> C with execution count 100, and
  //  D -> B with execution count 150
  // wher B, C are the only successors of A and A, D are thr only predessecors
  // of B. Then if we choose to layout edge A -> B as a fallthrough, the win in
  // branches would be 500 - 100 - 150 = 250. That is the weight of edge A->B.
  int64_t calculateWeight(const EdgeTy &E, const BinaryFunction &BF) const;

protected:
  void initQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF)  override;
  void adjustQueue(
      std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
  bool areClustersCompatible(
      const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const
  override;

public:
  void reset() override;
};


/// Objects of this class implement various basic block reordering alogrithms.
/// Most of these algorithms depend on a clustering alogrithm.
/// Here we have 3 conflicting goals as to how to layout clusters. If we want
/// to minimize jump offsets, we should put clusters with heavy inter-cluster
/// dependence as close as possible. If we want to maximize the probability
/// that all inter-cluster edges are predicted as not-taken, we should enforce
/// a topological order to make targets appear after sources, creating forward
/// branches. If we want to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, we
/// should put clusters in descending order of hotness.
class ReorderAlgorithm {
protected:
  std::unique_ptr<ClusterAlgorithm> CAlgo;

public:
  ReorderAlgorithm() { }
  explicit ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) :
    CAlgo(std::move(CAlgo)) { }

  using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;

  /// Reorder the basic blocks of the given function and store the new order in
  /// the new Clusters vector.
  virtual void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const = 0;

  void setClusterAlgorithm(ClusterAlgorithm *CAlgo) {
    this->CAlgo.reset(CAlgo);
  }

  virtual ~ReorderAlgorithm() { }
};


/// Dynamic programming implementation for the TSP, applied to BB layout. Find
/// the optimal way to maximize weight during a path traversing all BBs. In
/// this way, we will convert the hottest branches into fall-throughs.
///
/// Uses exponential amount of memory on the number of basic blocks and should
/// only be used for small functions.
class OptimalReorderAlgorithm : public ReorderAlgorithm {
public:
  void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const override;
};


/// Simple algorithm that groups basic blocks into clusters and then
/// lays them out cluster after cluster.
class OptimizeReorderAlgorithm : public ReorderAlgorithm {
public:
  explicit OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) :
    ReorderAlgorithm(std::move(CAlgo)) { }

  void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const override;
};


/// This reorder algorithm tries to ensure that all inter-cluster edges are
/// predicted as not-taken, by enforcing a topological order to make
/// targets appear after sources, creating forward branches.
class OptimizeBranchReorderAlgorithm : public ReorderAlgorithm {
public:
  explicit OptimizeBranchReorderAlgorithm(
      std::unique_ptr<ClusterAlgorithm> CAlgo) :
    ReorderAlgorithm(std::move(CAlgo)) { }

  void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const override;
};


/// This reorder tries to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, by
/// putting clusters in descending order of hotness.
class OptimizeCacheReorderAlgorithm : public ReorderAlgorithm {
public:
  explicit OptimizeCacheReorderAlgorithm(
      std::unique_ptr<ClusterAlgorithm> CAlgo) :
    ReorderAlgorithm(std::move(CAlgo)) { }

  void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const override;
};


/// Toy example that simply reverses the original basic block order.
class ReverseReorderAlgorithm : public ReorderAlgorithm {
public:
  void reorderBasicBlocks(
      const BinaryFunction &BF, BasicBlockOrder &Order) const override;
};


} // namespace bolt
} // namespace llvm

#endif