Maksim Panchenko a10fb73ab3 Compute ClusterEdges only when necessary.
We only need ClusterEdges in reordering algorithm optimized for
branches and the computation is quite resource-hungry, thus it
makes sense to only do it when needed.

Some refactoring too.

(cherry picked from FBD3721107)
2016-08-15 15:37:00 -07:00

261 lines
9 KiB

//===- ReorderAlgorithm.h - Interface for basic block reorderng algorithms ===//
// The LLVM Compiler Infrastructure
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// Interface to different basic block reordering algorithms.
#include "BinaryFunction.h"
#include "llvm/Support/ErrorHandling.h"
#include <unordered_map>
#include <memory>
#include <vector>
namespace llvm {
class raw_ostream;
namespace bolt {
class BinaryBasicBlock;
class BinaryFunction;
/// Objects of this class implement various basic block clustering algorithms.
/// Basic block clusters are chains of basic blocks that should be laid out
/// in this order to maximize performace. These algorithms group basic blocks
/// into clusters using execution profile data and various heuristics.
class ClusterAlgorithm {
using ClusterTy = std::vector<BinaryBasicBlock *>;
std::vector<ClusterTy> Clusters;
std::vector<std::unordered_map<uint32_t, uint64_t>> ClusterEdges;
std::vector<double> AvgFreq;
/// Group the basic blocks in the given function into clusters stored in the
/// Clusters vector. Also encode relative weights between two clusters in
/// the ClusterEdges vector if requested. This vector is indexed by
/// the clusters indices in the Clusters vector.
virtual void clusterBasicBlocks(const BinaryFunction &BF,
bool ComputeEdges = false) = 0;
/// Compute for each cluster its averagae execution frequency, that is
/// the sum of average frequencies of its blocks (execution count / # instrs).
/// The average frequencies are stored in the AvgFreq vector, index by the
/// cluster indices in the Clusters vector.
void computeClusterAverageFrequency();
/// Clear clusters and related info.
virtual void reset();
void printClusters() const;
virtual ~ClusterAlgorithm() {}
/// Base class for a greedy clustering algorithm that selects edges in order
/// based on some heuristic and uses them to join basic blocks into clusters.
class GreedyClusterAlgorithm : public ClusterAlgorithm {
// Represents an edge between two basic blocks, with source, destination, and
// profile count.
struct EdgeTy {
const BinaryBasicBlock *Src;
const BinaryBasicBlock *Dst;
uint64_t Count;
EdgeTy(const BinaryBasicBlock *Src, const BinaryBasicBlock *Dst,
uint64_t Count) :
Src(Src), Dst(Dst), Count(Count) {}
void print(raw_ostream &OS) const;
struct EdgeHash {
size_t operator() (const EdgeTy &E) const;
struct EdgeEqual {
bool operator() (const EdgeTy &A, const EdgeTy &B) const;
// Virtual methods that allow custom specialization of the heuristic used by
// the algorithm to select edges.
virtual void initQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) = 0;
virtual void adjustQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) = 0;
virtual bool areClustersCompatible(
const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const = 0;
// Map from basic block to owning cluster index.
using BBToClusterMapTy = std::unordered_map<const BinaryBasicBlock *,
BBToClusterMapTy BBToClusterMap;
void clusterBasicBlocks(const BinaryFunction &BF,
bool ComputeEdges = false) override;
void reset() override;
/// This clustering algorithm is based on a greedy heuristic suggested by
/// Pettis and Hansen (PLDI '90).
class PHGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
void initQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
void adjustQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
bool areClustersCompatible(
const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const
/// This clustering algorithm is based on a greedy heuristic that is a
/// modification of the heuristic suggested by Pettis (PLDI '90). It is
/// geared towards minimizing branches.
class MinBranchGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
// Map from an edge to its weight which is used by the algorithm to sort the
// edges.
std::unordered_map<EdgeTy, int64_t, EdgeHash, EdgeEqual> Weight;
// The weight of an edge is calculated as the win in branches if we choose
// to layout this edge as a fall-through. For example, consider the edges
// A -> B with execution count 500,
// A -> C with execution count 100, and
// D -> B with execution count 150
// wher B, C are the only successors of A and A, D are thr only predessecors
// of B. Then if we choose to layout edge A -> B as a fallthrough, the win in
// branches would be 500 - 100 - 150 = 250. That is the weight of edge A->B.
int64_t calculateWeight(const EdgeTy &E, const BinaryFunction &BF) const;
void initQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
void adjustQueue(
std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
bool areClustersCompatible(
const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const
void reset() override;
/// Objects of this class implement various basic block reordering alogrithms.
/// Most of these algorithms depend on a clustering alogrithm.
/// Here we have 3 conflicting goals as to how to layout clusters. If we want
/// to minimize jump offsets, we should put clusters with heavy inter-cluster
/// dependence as close as possible. If we want to maximize the probability
/// that all inter-cluster edges are predicted as not-taken, we should enforce
/// a topological order to make targets appear after sources, creating forward
/// branches. If we want to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, we
/// should put clusters in descending order of hotness.
class ReorderAlgorithm {
std::unique_ptr<ClusterAlgorithm> CAlgo;
ReorderAlgorithm() { }
explicit ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) :
CAlgo(std::move(CAlgo)) { }
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
/// Reorder the basic blocks of the given function and store the new order in
/// the new Clusters vector.
virtual void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const = 0;
void setClusterAlgorithm(ClusterAlgorithm *CAlgo) {
virtual ~ReorderAlgorithm() { }
/// Dynamic programming implementation for the TSP, applied to BB layout. Find
/// the optimal way to maximize weight during a path traversing all BBs. In
/// this way, we will convert the hottest branches into fall-throughs.
/// Uses exponential amount of memory on the number of basic blocks and should
/// only be used for small functions.
class OptimalReorderAlgorithm : public ReorderAlgorithm {
void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const override;
/// Simple algorithm that groups basic blocks into clusters and then
/// lays them out cluster after cluster.
class OptimizeReorderAlgorithm : public ReorderAlgorithm {
explicit OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) :
ReorderAlgorithm(std::move(CAlgo)) { }
void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const override;
/// This reorder algorithm tries to ensure that all inter-cluster edges are
/// predicted as not-taken, by enforcing a topological order to make
/// targets appear after sources, creating forward branches.
class OptimizeBranchReorderAlgorithm : public ReorderAlgorithm {
explicit OptimizeBranchReorderAlgorithm(
std::unique_ptr<ClusterAlgorithm> CAlgo) :
ReorderAlgorithm(std::move(CAlgo)) { }
void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const override;
/// This reorder tries to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, by
/// putting clusters in descending order of hotness.
class OptimizeCacheReorderAlgorithm : public ReorderAlgorithm {
explicit OptimizeCacheReorderAlgorithm(
std::unique_ptr<ClusterAlgorithm> CAlgo) :
ReorderAlgorithm(std::move(CAlgo)) { }
void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const override;
/// Toy example that simply reverses the original basic block order.
class ReverseReorderAlgorithm : public ReorderAlgorithm {
void reorderBasicBlocks(
const BinaryFunction &BF, BasicBlockOrder &Order) const override;
} // namespace bolt
} // namespace llvm