[regalloc] Remove -consider-local-interval-cost

Discussed extensively on D98232. The functionality introduced in D35816 never worked correctly. In D98232, it was fixed, but, as it was introducing a large compile-time regression, and the value of the original patch was called into doubt, we disabled it by default everywhere. A year later, it appears that caused no grief, so it seems safe to remove the disabled code. This should be accompanied by re-opening bug 26810. Differential Revision: https://reviews.llvm.org/D121128
2022-03-07 08:23:05 -08:00 · 2022-03-07 08:23:05 -08:00 · 294eca35a0
parent b6a7600491
commit 294eca35a0
9 changed files with 49 additions and 611 deletions
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@ -272,11 +272,6 @@ public:
  /// a finer grain to tune the register allocator.
  virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const;

-  /// True if the subtarget should consider the cost of local intervals
-  /// created by a split candidate when choosing the best split candidate. This
-  /// heuristic may be compile time intensive.
-  virtual bool enableAdvancedRASplitCost() const;
-
  /// Enable use of alias analysis during code generation (during MI
  /// scheduling, DAGCombine, etc.).
  virtual bool useAA() const;
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@ -124,12 +124,6 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
              cl::desc("Cost for first time use of callee-saved register."),
              cl::init(0), cl::Hidden);

-static cl::opt<bool> ConsiderLocalIntervalCost(
-    "consider-local-interval-cost", cl::Hidden,
-    cl::desc("Consider the cost of local intervals created by a split "
-             "candidate when choosing the best split candidate."),
-    cl::init(false));
-
 static cl::opt<long> GrowRegionComplexityBudget(
    "grow-region-complexity-budget",
    cl::desc("growRegion() does not scale with the number of BB edges, so "
@ -996,44 +990,12 @@ bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
  return true;
 }

-/// Check if splitting VirtRegToSplit will create a local split interval
-/// in basic block number BBNumber that may cause a spill.
-///
-/// \param VirtRegToSplit The register considered to be split.
-/// \param Cand           The split candidate that determines the physical
-///                       register we are splitting for and the interferences.
-/// \param BBNumber       The number of a BB for which the region split process
-///                       will create a local split interval.
-/// \param Order          The physical registers that may get evicted by a
-///                       split artifact of VirtRegToSplit.
-/// \return True if splitting VirtRegToSplit may cause a spill, false
-/// otherwise.
-bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
-                                       GlobalSplitCandidate &Cand,
-                                       unsigned BBNumber,
-                                       const AllocationOrder &Order) {
-  Cand.Intf.moveToBlock(BBNumber);
-
-  // Check if the local interval will find a non interfereing assignment.
-  for (auto PhysReg : Order.getOrder()) {
-    if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
-                                   Cand.Intf.last(), PhysReg))
-      return false;
-  }
-
-  // The local interval is not able to find non interferencing assignment
-  // and not able to evict a less worthy interval, therfore, it can cause a
-  // spill.
-  return true;
-}
-
 /// calcGlobalSplitCost - Return the global split cost of following the split
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
-                                             const AllocationOrder &Order,
-                                             bool *CanCauseEvictionChain) {
+                                             const AllocationOrder &Order) {
  BlockFrequency GlobalCost = 0;
  const BitVector &LiveBundles = Cand.LiveBundles;
  Register VirtRegToSplit = SA->getParent().reg();
@ -1046,29 +1008,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
    unsigned Ins = 0;

    Cand.Intf.moveToBlock(BC.Number);
-    // Check wheather a local interval is going to be created during the region
-    // split. Calculate adavanced spilt cost (cost of local intervals) if option
-    // is enabled.
-    if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
-        BI.LiveOut && RegIn && RegOut) {
-
-      if (CanCauseEvictionChain &&
-          splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
-        // This interference causes our eviction from this assignment, we might
-        // evict somebody else and eventually someone will spill, add that cost.
-        // See splitCanCauseEvictionChain for detailed description of scenarios.
-        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-
-        *CanCauseEvictionChain = true;
-
-      } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
-                                         Order)) {
-        // This interference causes local interval to spill, add that cost.
-        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-      }
-    }

    if (BI.LiveIn)
      Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
@ -1089,20 +1028,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
      if (Cand.Intf.hasInterference()) {
        GlobalCost += SpillPlacer->getBlockFrequency(Number);
        GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
-        // Check wheather a local interval is going to be created during the
-        // region split.
-        if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
-            splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
-          // This interference cause our eviction from this assignment, we might
-          // evict somebody else, add that cost.
-          // See splitCanCauseEvictionChain for detailed description of
-          // scenarios.
-          GlobalCost += SpillPlacer->getBlockFrequency(Number);
-          GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
-          *CanCauseEvictionChain = true;
-        }
      }
      continue;
    }
@ -1285,19 +1210,8 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
               MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
  }

-  bool CanCauseEvictionChain = false;
-  unsigned BestCand =
-      calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
-                               false /*IgnoreCSR*/, &CanCauseEvictionChain);
-
-  // Split candidates with compact regions can cause a bad eviction sequence.
-  // See splitCanCauseEvictionChain for detailed description of scenarios.
-  // To avoid it, we need to comapre the cost with the spill cost and not the
-  // current max frequency.
-  if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
-    CanCauseEvictionChain) {
-    return MCRegister::NoRegister;
-  }
+  unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+                                               NumCands, false /*IgnoreCSR*/);

  // No solutions found, fall back to single block splitting.
  if (!HasCompact && BestCand == NoCand)
@ -1309,8 +1223,8 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
                                            AllocationOrder &Order,
                                            BlockFrequency &BestCost,
-                                            unsigned &NumCands, bool IgnoreCSR,
-                                            bool *CanCauseEvictionChain) {
+                                            unsigned &NumCands,
+                                            bool IgnoreCSR) {
  unsigned BestCand = NoCand;
  for (MCPhysReg PhysReg : Order) {
    assert(PhysReg);
@ -1373,8 +1287,7 @@ unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
      continue;
    }

-    bool HasEvictionChain = false;
-    Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
+    Cost += calcGlobalSplitCost(Cand, Order);
    LLVM_DEBUG({
      dbgs() << ", total = ";
      MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
@ -1385,24 +1298,10 @@ unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
    if (Cost < BestCost) {
      BestCand = NumCands;
      BestCost = Cost;
-      // See splitCanCauseEvictionChain for detailed description of bad
-      // eviction chain scenarios.
-      if (CanCauseEvictionChain)
-        *CanCauseEvictionChain = HasEvictionChain;
    }
    ++NumCands;
  }

-  if (CanCauseEvictionChain && BestCand != NoCand) {
-    // See splitCanCauseEvictionChain for detailed description of bad
-    // eviction chain scenarios.
-    LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
-                      << printReg(VirtReg.reg(), TRI) << "  may ");
-    if (!(*CanCauseEvictionChain))
-      LLVM_DEBUG(dbgs() << "not ");
-    LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
-  }
-
  return BestCand;
 }

@ -2738,11 +2637,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
  TII = MF->getSubtarget().getInstrInfo();
  RCI.runOnMachineFunction(mf);

-  EnableAdvancedRASplitCost =
-      ConsiderLocalIntervalCost.getNumOccurrences()
-          ? ConsiderLocalIntervalCost
-          : MF->getSubtarget().enableAdvancedRASplitCost();
-
  if (VerifyEnabled)
    MF->verify(this, "Before greedy register allocator");

--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@ -320,10 +320,6 @@ private:
  /// Callee-save register cost, calculated once per machine function.
  BlockFrequency CSRCost;

-  /// Enable or not the consideration of the cost of local intervals created
-  /// by a split candidate when choosing the best split candidate.
-  bool EnableAdvancedRASplitCost;
-
  /// Set of broken hints that may be reconciled later because of eviction.
  SmallSetVector<const LiveInterval *, 8> SetOfBrokenHints;

@ -380,12 +376,8 @@ private:
  bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
                                  unsigned BBNumber,
                                  const AllocationOrder &Order);
-  bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
-                               GlobalSplitCandidate &Cand, unsigned BBNumber,
-                               const AllocationOrder &Order);
  BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
-                                     const AllocationOrder &Order,
-                                     bool *CanCauseEvictionChain);
+                                     const AllocationOrder &Order);
  bool calcCompactRegion(GlobalSplitCandidate &);
  void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>);
  void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
@ -414,8 +406,7 @@ private:
  unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
                                    AllocationOrder &Order,
                                    BlockFrequency &BestCost,
-                                    unsigned &NumCands, bool IgnoreCSR,
-                                    bool *CanCauseEvictionChain = nullptr);
+                                    unsigned &NumCands, bool IgnoreCSR);
  /// Perform region splitting.
  unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
                         bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@ -45,10 +45,6 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
  return true;
 }

-bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
-  return false;
-}
-
 bool TargetSubtargetInfo::enablePostRAScheduler() const {
  return getSchedModel().PostRAScheduler;
 }
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@ -623,8 +623,6 @@ public:

  bool enableEarlyIfConversion() const override;

-  bool enableAdvancedRASplitCost() const override { return false; }
-
  std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;

  bool isCallingConvWin64(CallingConv::ID CC) const {
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@ -995,8 +995,6 @@ public:
  AntiDepBreakMode getAntiDepBreakMode() const override {
    return TargetSubtargetInfo::ANTIDEP_CRITICAL;
  }
-
-  bool enableAdvancedRASplitCost() const override { return false; }
 };

 } // end namespace llvm
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -consider-local-interval-cost -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s

@A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
@B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
@ -22,7 +22,7 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    .cfi_offset b13, -48
 ; CHECK-NEXT:    .cfi_offset b14, -56
 ; CHECK-NEXT:    .cfi_offset b15, -64
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    movi v14.2d, #0000000000000000
 ; CHECK-NEXT:    adrp x10, B+48
 ; CHECK-NEXT:    adrp x11, A
 ; CHECK-NEXT:    mov x8, xzr
@ -31,6 +31,7 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    add x11, x11, :lo12:A
 ; CHECK-NEXT:    // implicit-def: $q2
 ; CHECK-NEXT:    // implicit-def: $q3
+; CHECK-NEXT:    // implicit-def: $q15
 ; CHECK-NEXT:    // implicit-def: $q4
 ; CHECK-NEXT:    // implicit-def: $q5
 ; CHECK-NEXT:    // implicit-def: $q6
@ -57,23 +58,21 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    // implicit-def: $q11
 ; CHECK-NEXT:    // implicit-def: $q12
 ; CHECK-NEXT:    // implicit-def: $q13
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    // implicit-def: $q0
-; CHECK-NEXT:    // kill: killed $q0
 ; CHECK-NEXT:  .LBB0_1: // %for.cond1.preheader
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    mov x12, xzr
+; CHECK-NEXT:    stp q15, q14, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    ldr q14, [x8]
-; CHECK-NEXT:    ldr q15, [x10], #64
 ; CHECK-NEXT:    add x15, x11, x8
-; CHECK-NEXT:    add x9, x9, #1
+; CHECK-NEXT:    ldr q15, [x10], #64
 ; CHECK-NEXT:    ldr q0, [x12]
-; CHECK-NEXT:    fmov x13, d14
+; CHECK-NEXT:    add x9, x9, #1
 ; CHECK-NEXT:    ldr x12, [x12]
-; CHECK-NEXT:    fmov x0, d15
+; CHECK-NEXT:    fmov x13, d14
 ; CHECK-NEXT:    mov x14, v14.d[1]
-; CHECK-NEXT:    ldr x15, [x15, #128]
+; CHECK-NEXT:    fmov x0, d15
 ; CHECK-NEXT:    fmov x16, d0
+; CHECK-NEXT:    ldr x15, [x15, #128]
 ; CHECK-NEXT:    mul x17, x13, x12
 ; CHECK-NEXT:    mov x18, v0.d[1]
 ; CHECK-NEXT:    mul x4, x0, x12
@ -85,45 +84,51 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    fmov d15, x4
 ; CHECK-NEXT:    fmov d14, x1
 ; CHECK-NEXT:    mul x1, x18, x12
-; CHECK-NEXT:    ldr x2, [x8], #8
 ; CHECK-NEXT:    mov v0.d[1], x3
 ; CHECK-NEXT:    mul x3, x16, x15
+; CHECK-NEXT:    ldr x2, [x8], #8
 ; CHECK-NEXT:    mul x12, x17, x12
 ; CHECK-NEXT:    fmov d1, x5
-; CHECK-NEXT:    mul x13, x13, x2
-; CHECK-NEXT:    cmp x8, #64
 ; CHECK-NEXT:    mov v14.d[1], x1
 ; CHECK-NEXT:    mul x1, x14, x15
 ; CHECK-NEXT:    add v12.2d, v12.2d, v0.2d
-; CHECK-NEXT:    mul x14, x14, x2
+; CHECK-NEXT:    mul x13, x13, x2
+; CHECK-NEXT:    fmov d0, x3
+; CHECK-NEXT:    mul x3, x0, x15
 ; CHECK-NEXT:    mov v15.d[1], x12
 ; CHECK-NEXT:    mul x12, x18, x2
-; CHECK-NEXT:    mul x18, x18, x15
-; CHECK-NEXT:    fmov d0, x3
 ; CHECK-NEXT:    mov v1.d[1], x1
+; CHECK-NEXT:    mul x18, x18, x15
 ; CHECK-NEXT:    mul x16, x16, x2
-; CHECK-NEXT:    mul x3, x0, x15
+; CHECK-NEXT:    cmp x8, #64
+; CHECK-NEXT:    mul x15, x17, x15
+; CHECK-NEXT:    add v13.2d, v13.2d, v14.2d
+; CHECK-NEXT:    mul x14, x14, x2
+; CHECK-NEXT:    add v11.2d, v11.2d, v14.2d
+; CHECK-NEXT:    fmov d14, x3
 ; CHECK-NEXT:    add v10.2d, v10.2d, v15.2d
 ; CHECK-NEXT:    fmov d15, x13
 ; CHECK-NEXT:    mov v0.d[1], x18
 ; CHECK-NEXT:    mul x13, x0, x2
 ; CHECK-NEXT:    add v29.2d, v29.2d, v1.2d
-; CHECK-NEXT:    mul x15, x17, x15
-; CHECK-NEXT:    mov v15.d[1], x14
 ; CHECK-NEXT:    fmov d1, x16
-; CHECK-NEXT:    add v28.2d, v28.2d, v0.2d
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    add v13.2d, v13.2d, v14.2d
+; CHECK-NEXT:    mov v14.d[1], x15
+; CHECK-NEXT:    mov v15.d[1], x14
 ; CHECK-NEXT:    mov v1.d[1], x12
 ; CHECK-NEXT:    mul x12, x17, x2
-; CHECK-NEXT:    add v0.2d, v0.2d, v15.2d
-; CHECK-NEXT:    add v11.2d, v11.2d, v14.2d
-; CHECK-NEXT:    fmov d14, x3
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add v28.2d, v28.2d, v0.2d
 ; CHECK-NEXT:    fmov d0, x13
-; CHECK-NEXT:    add v9.2d, v9.2d, v1.2d
-; CHECK-NEXT:    mov v14.d[1], x15
+; CHECK-NEXT:    add v27.2d, v27.2d, v14.2d
+; CHECK-NEXT:    ldr q14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add v8.2d, v8.2d, v15.2d
 ; CHECK-NEXT:    mov v0.d[1], x12
+; CHECK-NEXT:    add v25.2d, v25.2d, v15.2d
+; CHECK-NEXT:    add v22.2d, v22.2d, v15.2d
+; CHECK-NEXT:    add v18.2d, v18.2d, v15.2d
+; CHECK-NEXT:    add v6.2d, v6.2d, v15.2d
+; CHECK-NEXT:    add v14.2d, v14.2d, v15.2d
+; CHECK-NEXT:    ldr q15, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add v9.2d, v9.2d, v1.2d
 ; CHECK-NEXT:    add v31.2d, v31.2d, v1.2d
 ; CHECK-NEXT:    add v26.2d, v26.2d, v1.2d
 ; CHECK-NEXT:    add v23.2d, v23.2d, v1.2d
@ -132,39 +137,30 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    add v17.2d, v17.2d, v1.2d
 ; CHECK-NEXT:    add v7.2d, v7.2d, v1.2d
 ; CHECK-NEXT:    add v5.2d, v5.2d, v1.2d
+; CHECK-NEXT:    add v15.2d, v15.2d, v1.2d
 ; CHECK-NEXT:    add v3.2d, v3.2d, v1.2d
-; CHECK-NEXT:    add v2.2d, v2.2d, v1.2d
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add v27.2d, v27.2d, v14.2d
-; CHECK-NEXT:    add v8.2d, v8.2d, v15.2d
-; CHECK-NEXT:    add v25.2d, v25.2d, v15.2d
-; CHECK-NEXT:    add v22.2d, v22.2d, v15.2d
-; CHECK-NEXT:    add v18.2d, v18.2d, v15.2d
-; CHECK-NEXT:    add v6.2d, v6.2d, v15.2d
 ; CHECK-NEXT:    add v30.2d, v30.2d, v0.2d
 ; CHECK-NEXT:    add v24.2d, v24.2d, v0.2d
 ; CHECK-NEXT:    add v20.2d, v20.2d, v0.2d
 ; CHECK-NEXT:    add v16.2d, v16.2d, v0.2d
 ; CHECK-NEXT:    add v4.2d, v4.2d, v0.2d
-; CHECK-NEXT:    add v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add v2.2d, v2.2d, v0.2d
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %for.cond.cleanup
 ; CHECK-NEXT:    adrp x8, C
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x8, x8, :lo12:C
-; CHECK-NEXT:    ldp d15, d14, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q13, q12, [x8]
 ; CHECK-NEXT:    stp q11, q10, [x8, #32]
 ; CHECK-NEXT:    stp q9, q8, [x8, #64]
+; CHECK-NEXT:    stp q4, q15, [x8, #432]
+; CHECK-NEXT:    stp q14, q3, [x8, #464]
 ; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    stp q0, q2, [x8, #464]
-; CHECK-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q31, q30, [x8, #96]
-; CHECK-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q29, q28, [x8, #144]
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q27, q26, [x8, #176]
+; CHECK-NEXT:    ldp d15, d14, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    str q25, [x8, #208]
 ; CHECK-NEXT:    stp q24, q23, [x8, #240]
 ; CHECK-NEXT:    stp q22, q21, [x8, #272]
@ -172,8 +168,7 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    stp q18, q17, [x8, #336]
 ; CHECK-NEXT:    stp q16, q7, [x8, #368]
 ; CHECK-NEXT:    stp q6, q5, [x8, #400]
-; CHECK-NEXT:    stp q4, q3, [x8, #432]
-; CHECK-NEXT:    str q0, [x8, #496]
+; CHECK-NEXT:    str q2, [x8, #496]
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
 entry:
--- a/llvm/test/CodeGen/X86/bug26810.ll
+++ b/llvm/test/CodeGen/X86/bug26810.ll
@ -1,313 +0,0 @@
-; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
-; Make sure bad eviction sequence doesnt occur
-
-; Fix for bugzilla 26810.
-; This test is meant to make sure bad eviction sequence like the one described
-; below does not occur
-;
-; movapd	%xmm7, 160(%esp)        # 16-byte Spill
-; movapd	%xmm5, %xmm7
-; movapd	%xmm4, %xmm5
-; movapd	%xmm3, %xmm4
-; movapd	%xmm2, %xmm3
-; some_inst
-; movapd	%xmm3, %xmm2
-; movapd	%xmm4, %xmm3
-; movapd	%xmm5, %xmm4
-; movapd	%xmm7, %xmm5
-; movapd	160(%esp), %xmm7        # 16-byte Reload
-
-; Make sure we have no redundant copies in the problematic code section
-; CHECK-LABEL: name: loop
-; CHECK: bb.2.for.body:
-; CHECK: SUBPDrr
-; CHECK-NEXT: MOVAPSmr
-; CHECK-NEXT: MULPDrm
-; CHECK-NEXT: MOVAPSrm
-; CHECK-NEXT: ADDPDrr
-; CHECK-NEXT: MOVAPSmr
-; CHECK-NEXT: ADD32ri8
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i386-pc-linux-gnu"
-
-%struct._iobuf = type { i8* }
-
-$"\01??_C@_01NOFIACDB@w?$AA@" = comdat any
-
-$"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = comdat any
-
-@"\01?v@@3PAU__m128d@@A" = global [8 x <2 x double>] zeroinitializer, align 16
-@"\01?m1@@3PAU__m128d@@A" = local_unnamed_addr global [76800000 x <2 x double>] zeroinitializer, align 16
-@"\01?m2@@3PAU__m128d@@A" = local_unnamed_addr global [8 x <2 x double>] zeroinitializer, align 16
-@"\01??_C@_01NOFIACDB@w?$AA@" = linkonce_odr unnamed_addr constant [2 x i8] c"w\00", comdat, align 1
-@"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = linkonce_odr unnamed_addr constant [10 x i8] c"/dev/null\00", comdat, align 1
-
-; Function Attrs: norecurse
-define i32 @main() local_unnamed_addr #0 {
-entry:
-  tail call void @init()
-  %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  %.promoted.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  %.promoted51.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  %.promoted53.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  %.promoted55.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  %.promoted57.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  %.promoted59.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  %.promoted61.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  %.promoted63.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  br label %for.body.i
-
-for.body.i:                                       ; preds = %for.body.i, %entry
-  %add.i64.i = phi <2 x double> [ %.promoted63.i, %entry ], [ %add.i.i, %for.body.i ]
-  %add.i3662.i = phi <2 x double> [ %.promoted61.i, %entry ], [ %add.i36.i, %for.body.i ]
-  %add.i3860.i = phi <2 x double> [ %.promoted59.i, %entry ], [ %add.i38.i, %for.body.i ]
-  %add.i4058.i = phi <2 x double> [ %.promoted57.i, %entry ], [ %add.i40.i, %for.body.i ]
-  %add.i4256.i = phi <2 x double> [ %.promoted55.i, %entry ], [ %add.i42.i, %for.body.i ]
-  %add.i4454.i = phi <2 x double> [ %.promoted53.i, %entry ], [ %add.i44.i, %for.body.i ]
-  %add.i4652.i = phi <2 x double> [ %.promoted51.i, %entry ], [ %add.i46.i, %for.body.i ]
-  %add.i4850.i = phi <2 x double> [ %.promoted.i, %entry ], [ %add.i48.i, %for.body.i ]
-  %i.049.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
-  %arrayidx.i = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049.i
-  %8 = load <2 x double>, <2 x double>* %arrayidx.i, align 16, !tbaa !8
-  %mul.i.i = fmul <2 x double> %0, %8
-  %add.i48.i = fadd <2 x double> %add.i4850.i, %mul.i.i
-  %mul.i47.i = fmul <2 x double> %1, %8
-  %add.i46.i = fadd <2 x double> %add.i4652.i, %mul.i47.i
-  %mul.i45.i = fmul <2 x double> %2, %8
-  %add.i44.i = fadd <2 x double> %add.i4454.i, %mul.i45.i
-  %mul.i43.i = fmul <2 x double> %3, %8
-  %add.i42.i = fadd <2 x double> %add.i4256.i, %mul.i43.i
-  %mul.i41.i = fmul <2 x double> %4, %8
-  %add.i40.i = fadd <2 x double> %add.i4058.i, %mul.i41.i
-  %mul.i39.i = fmul <2 x double> %5, %8
-  %add.i38.i = fadd <2 x double> %add.i3860.i, %mul.i39.i
-  %mul.i37.i = fmul <2 x double> %6, %8
-  %add.i36.i = fsub <2 x double> %add.i3662.i, %mul.i37.i
-  %mul.i35.i = fmul <2 x double> %7, %8
-  %add.i.i = fadd <2 x double> %add.i64.i, %mul.i35.i
-  %inc.i = add nuw nsw i32 %i.049.i, 1
-  %exitcond.i = icmp eq i32 %inc.i, 76800000
-  br i1 %exitcond.i, label %loop.exit, label %for.body.i
-
-loop.exit:                           ; preds = %for.body.i
-  store <2 x double> %add.i48.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  store <2 x double> %add.i44.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  store <2 x double> %add.i42.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  store <2 x double> %add.i40.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  store <2 x double> %add.i38.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  store <2 x double> %add.i36.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  store <2 x double> %add.i.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  %call.i = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) #7
-  %call1.i = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call.i) #7
-  %call2.i = tail call i32 @fclose(%struct._iobuf* %call.i) #7
-  ret i32 0
-}
-
-define void @init() local_unnamed_addr #1 {
-entry:
-  call void @llvm.memset.p0i8.i32(i8* align 16 bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i8 0, i32 128, i1 false)
-  %call.i = tail call i64 @_time64(i64* null)
-  %conv = trunc i64 %call.i to i32
-  tail call void @srand(i32 %conv)
-  br label %for.body6
-
-for.body6:                                        ; preds = %for.body6, %entry
-  %i2.051 = phi i32 [ 0, %entry ], [ %inc14, %for.body6 ]
-  %call7 = tail call i32 @rand()
-  %conv8 = sitofp i32 %call7 to double
-  %tmp.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv8, i32 0
-  %call9 = tail call i32 @rand()
-  %conv10 = sitofp i32 %call9 to double
-  %tmp.sroa.0.8.vec.insert = insertelement <2 x double> %tmp.sroa.0.0.vec.insert, double %conv10, i32 1
-  %arrayidx12 = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i2.051
-  store <2 x double> %tmp.sroa.0.8.vec.insert, <2 x double>* %arrayidx12, align 16, !tbaa !8
-  %inc14 = add nuw nsw i32 %i2.051, 1
-  %exitcond = icmp eq i32 %inc14, 76800000
-  br i1 %exitcond, label %for.body21.preheader, label %for.body6
-
-for.body21.preheader:                             ; preds = %for.body6
-  %call25 = tail call i32 @rand()
-  %conv26 = sitofp i32 %call25 to double
-  %tmp23.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv26, i32 0
-  %call28 = tail call i32 @rand()
-  %conv29 = sitofp i32 %call28 to double
-  %tmp23.sroa.0.8.vec.insert = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert, double %conv29, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  %call25.1 = tail call i32 @rand()
-  %conv26.1 = sitofp i32 %call25.1 to double
-  %tmp23.sroa.0.0.vec.insert.1 = insertelement <2 x double> undef, double %conv26.1, i32 0
-  %call28.1 = tail call i32 @rand()
-  %conv29.1 = sitofp i32 %call28.1 to double
-  %tmp23.sroa.0.8.vec.insert.1 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.1, double %conv29.1, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.1, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  %call25.2 = tail call i32 @rand()
-  %conv26.2 = sitofp i32 %call25.2 to double
-  %tmp23.sroa.0.0.vec.insert.2 = insertelement <2 x double> undef, double %conv26.2, i32 0
-  %call28.2 = tail call i32 @rand()
-  %conv29.2 = sitofp i32 %call28.2 to double
-  %tmp23.sroa.0.8.vec.insert.2 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.2, double %conv29.2, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.2, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  %call25.3 = tail call i32 @rand()
-  %conv26.3 = sitofp i32 %call25.3 to double
-  %tmp23.sroa.0.0.vec.insert.3 = insertelement <2 x double> undef, double %conv26.3, i32 0
-  %call28.3 = tail call i32 @rand()
-  %conv29.3 = sitofp i32 %call28.3 to double
-  %tmp23.sroa.0.8.vec.insert.3 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.3, double %conv29.3, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.3, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  %call25.4 = tail call i32 @rand()
-  %conv26.4 = sitofp i32 %call25.4 to double
-  %tmp23.sroa.0.0.vec.insert.4 = insertelement <2 x double> undef, double %conv26.4, i32 0
-  %call28.4 = tail call i32 @rand()
-  %conv29.4 = sitofp i32 %call28.4 to double
-  %tmp23.sroa.0.8.vec.insert.4 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.4, double %conv29.4, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.4, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  %call25.5 = tail call i32 @rand()
-  %conv26.5 = sitofp i32 %call25.5 to double
-  %tmp23.sroa.0.0.vec.insert.5 = insertelement <2 x double> undef, double %conv26.5, i32 0
-  %call28.5 = tail call i32 @rand()
-  %conv29.5 = sitofp i32 %call28.5 to double
-  %tmp23.sroa.0.8.vec.insert.5 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.5, double %conv29.5, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.5, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  %call25.6 = tail call i32 @rand()
-  %conv26.6 = sitofp i32 %call25.6 to double
-  %tmp23.sroa.0.0.vec.insert.6 = insertelement <2 x double> undef, double %conv26.6, i32 0
-  %call28.6 = tail call i32 @rand()
-  %conv29.6 = sitofp i32 %call28.6 to double
-  %tmp23.sroa.0.8.vec.insert.6 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.6, double %conv29.6, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.6, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  %call25.7 = tail call i32 @rand()
-  %conv26.7 = sitofp i32 %call25.7 to double
-  %tmp23.sroa.0.0.vec.insert.7 = insertelement <2 x double> undef, double %conv26.7, i32 0
-  %call28.7 = tail call i32 @rand()
-  %conv29.7 = sitofp i32 %call28.7 to double
-  %tmp23.sroa.0.8.vec.insert.7 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.7, double %conv29.7, i32 1
-  store <2 x double> %tmp23.sroa.0.8.vec.insert.7, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  ret void
-}
-
-; Function Attrs: norecurse nounwind
-define void @loop() local_unnamed_addr #2 {
-entry:
-  %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  %.promoted = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  %.promoted51 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  %.promoted53 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  %.promoted55 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  %.promoted57 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  %.promoted59 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  %.promoted61 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  %.promoted63 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body
-  store <2 x double> %add.i48, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
-  store <2 x double> %add.i46, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
-  store <2 x double> %add.i44, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
-  store <2 x double> %add.i42, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
-  store <2 x double> %add.i40, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
-  store <2 x double> %add.i38, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
-  store <2 x double> %add.i36, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
-  store <2 x double> %add.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
-  ret void
-
-for.body:                                         ; preds = %for.body, %entry
-  %add.i64 = phi <2 x double> [ %.promoted63, %entry ], [ %add.i, %for.body ]
-  %add.i3662 = phi <2 x double> [ %.promoted61, %entry ], [ %add.i36, %for.body ]
-  %add.i3860 = phi <2 x double> [ %.promoted59, %entry ], [ %add.i38, %for.body ]
-  %add.i4058 = phi <2 x double> [ %.promoted57, %entry ], [ %add.i40, %for.body ]
-  %add.i4256 = phi <2 x double> [ %.promoted55, %entry ], [ %add.i42, %for.body ]
-  %add.i4454 = phi <2 x double> [ %.promoted53, %entry ], [ %add.i44, %for.body ]
-  %add.i4652 = phi <2 x double> [ %.promoted51, %entry ], [ %add.i46, %for.body ]
-  %add.i4850 = phi <2 x double> [ %.promoted, %entry ], [ %add.i48, %for.body ]
-  %i.049 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049
-  %8 = load <2 x double>, <2 x double>* %arrayidx, align 16, !tbaa !8
-  %mul.i = fmul <2 x double> %8, %0
-  %add.i48 = fadd <2 x double> %add.i4850, %mul.i
-  %mul.i47 = fmul <2 x double> %8, %1
-  %add.i46 = fadd <2 x double> %add.i4652, %mul.i47
-  %mul.i45 = fmul <2 x double> %8, %2
-  %add.i44 = fadd <2 x double> %add.i4454, %mul.i45
-  %mul.i43 = fmul <2 x double> %8, %3
-  %add.i42 = fadd <2 x double> %add.i4256, %mul.i43
-  %mul.i41 = fmul <2 x double> %8, %4
-  %add.i40 = fadd <2 x double> %add.i4058, %mul.i41
-  %mul.i39 = fmul <2 x double> %8, %5
-  %add.i38 = fadd <2 x double> %add.i3860, %mul.i39
-  %mul.i37 = fmul <2 x double> %8, %6
-  %add.i36 = fsub <2 x double> %add.i3662, %mul.i37
-  %mul.i35 = fmul <2 x double> %8, %7
-  %add.i = fadd <2 x double> %add.i64, %mul.i35
-  %inc = add nuw nsw i32 %i.049, 1
-  %exitcond = icmp eq i32 %inc, 76800000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-; Function Attrs: nounwind
-define void @"\01?dump@@YAXXZ"() local_unnamed_addr #3 {
-entry:
-  %call = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0))
-  %call1 = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call)
-  %call2 = tail call i32 @fclose(%struct._iobuf* %call)
-  ret void
-}
-
-declare void @srand(i32) local_unnamed_addr #4
-
-declare i32 @rand() local_unnamed_addr #4
-
-; Function Attrs: nounwind
-declare noalias %struct._iobuf* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #5
-
-; Function Attrs: nounwind
-declare i32 @fwrite(i8* nocapture, i32, i32, %struct._iobuf* nocapture) local_unnamed_addr #5
-
-; Function Attrs: nounwind
-declare i32 @fclose(%struct._iobuf* nocapture) local_unnamed_addr #5
-
-declare i64 @_time64(i64*) local_unnamed_addr #4
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) #6
-
-attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #6 = { argmemonly nounwind }
-attributes #7 = { nounwind }
-
-!llvm.linker.options = !{!0, !1, !2, !3, !4}
-!llvm.module.flags = !{!5, !6}
-!llvm.ident = !{!7}
-
-!0 = !{!"/FAILIFMISMATCH:\22_MSC_VER=1900\22"}
-!1 = !{!"/FAILIFMISMATCH:\22_ITERATOR_DEBUG_LEVEL=0\22"}
-!2 = !{!"/FAILIFMISMATCH:\22RuntimeLibrary=MT_StaticRelease\22"}
-!3 = !{!"/DEFAULTLIB:libcpmt.lib"}
-!4 = !{!"/FAILIFMISMATCH:\22_CRT_STDIO_ISO_WIDE_SPECIFIERS=0\22"}
-!5 = !{i32 1, !"NumRegisterParameters", i32 0}
-!6 = !{i32 1, !"wchar_size", i32 2}
-!7 = !{!"clang version 5.0.0 (cfe/trunk 305640)"}
-!8 = !{!9, !9, i64 0}
-!9 = !{!"omnipotent char", !10, i64 0}
-!10 = !{!"Simple C++ TBAA"}
--- a/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
+++ b/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
@ -1,116 +0,0 @@
-; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
-; Make sure bad eviction sequence doesnt occur
-
-; Part of the fix for bugzilla 26810.
-; This test is meant to make sure bad eviction sequence like the one described
-; below does not occur
-;
-; movl	%ebp, 8($esp)           # 4-byte Spill
-; movl	%ecx, %ebp
-; movl	%ebx, %ecx
-; movl	$edi, %ebx
-; movl	$edx, $edi
-; cltd
-; idivl	%esi
-; movl	$edi, $edx
-; movl	%ebx, $edi
-; movl	%ecx, %ebx
-; movl	%ebp, %ecx
-; movl	16($esp), %ebp          # 4 - byte Reload
-
-; Make sure we have no redundant copies in the problematic code seqtion
-; CHECK-LABEL: name: bar
-; CHECK: bb.3.for.body:
-; CHECK: $eax = COPY
-; CHECK-NEXT: CDQ
-; CHECK-NEXT: IDIV32r
-; CHECK-NEXT: ADD32rr
-
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i386-pc-linux-gnu"
-
-
-; Function Attrs: norecurse nounwind readonly
-define i32 @bar(i32 %size, i32* nocapture readonly %arr, i32* nocapture readnone %tmp) local_unnamed_addr #1 {
-entry:
-  %0 = load i32, i32* %arr, align 4, !tbaa !3
-  %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 1
-  %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
-  %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 2
-  %2 = load i32, i32* %arrayidx5, align 4, !tbaa !3
-  %arrayidx7 = getelementptr inbounds i32, i32* %arr, i32 3
-  %3 = load i32, i32* %arrayidx7, align 4, !tbaa !3
-  %arrayidx9 = getelementptr inbounds i32, i32* %arr, i32 4
-  %4 = load i32, i32* %arrayidx9, align 4, !tbaa !3
-  %arrayidx11 = getelementptr inbounds i32, i32* %arr, i32 5
-  %5 = load i32, i32* %arrayidx11, align 4, !tbaa !3
-  %arrayidx13 = getelementptr inbounds i32, i32* %arr, i32 6
-  %6 = load i32, i32* %arrayidx13, align 4, !tbaa !3
-  %arrayidx15 = getelementptr inbounds i32, i32* %arr, i32 7
-  %7 = load i32, i32* %arrayidx15, align 4, !tbaa !3
-  %arrayidx17 = getelementptr inbounds i32, i32* %arr, i32 8
-  %8 = load i32, i32* %arrayidx17, align 4, !tbaa !3
-  %cmp69 = icmp sgt i32 %size, 1
-  br i1 %cmp69, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.body, %entry
-  %x0.0.lcssa = phi i32 [ %0, %entry ], [ %add, %for.body ]
-  %x1.0.lcssa = phi i32 [ %1, %entry ], [ %sub, %for.body ]
-  %x2.0.lcssa = phi i32 [ %2, %entry ], [ %mul, %for.body ]
-  %x3.0.lcssa = phi i32 [ %3, %entry ], [ %div, %for.body ]
-  %x4.0.lcssa = phi i32 [ %4, %entry ], [ %add19, %for.body ]
-  %x5.0.lcssa = phi i32 [ %5, %entry ], [ %sub20, %for.body ]
-  %x6.0.lcssa = phi i32 [ %6, %entry ], [ %add21, %for.body ]
-  %x7.0.lcssa = phi i32 [ %7, %entry ], [ %mul22, %for.body ]
-  %x8.0.lcssa = phi i32 [ %8, %entry ], [ %sub23, %for.body ]
-  %mul24 = mul nsw i32 %x1.0.lcssa, %x0.0.lcssa
-  %mul25 = mul nsw i32 %mul24, %x2.0.lcssa
-  %mul26 = mul nsw i32 %mul25, %x3.0.lcssa
-  %mul27 = mul nsw i32 %mul26, %x4.0.lcssa
-  %mul28 = mul nsw i32 %mul27, %x5.0.lcssa
-  %mul29 = mul nsw i32 %mul28, %x6.0.lcssa
-  %mul30 = mul nsw i32 %mul29, %x7.0.lcssa
-  %mul31 = mul nsw i32 %mul30, %x8.0.lcssa
-  ret i32 %mul31
-
-for.body:                                         ; preds = %entry, %for.body
-  %i.079 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
-  %x8.078 = phi i32 [ %sub23, %for.body ], [ %8, %entry ]
-  %x7.077 = phi i32 [ %mul22, %for.body ], [ %7, %entry ]
-  %x6.076 = phi i32 [ %add21, %for.body ], [ %6, %entry ]
-  %x5.075 = phi i32 [ %sub20, %for.body ], [ %5, %entry ]
-  %x4.074 = phi i32 [ %add19, %for.body ], [ %4, %entry ]
-  %x3.073 = phi i32 [ %div, %for.body ], [ %3, %entry ]
-  %x2.072 = phi i32 [ %mul, %for.body ], [ %2, %entry ]
-  %x1.071 = phi i32 [ %sub, %for.body ], [ %1, %entry ]
-  %x0.070 = phi i32 [ %add, %for.body ], [ %0, %entry ]
-  %add = add nsw i32 %x1.071, %x0.070
-  %sub = sub nsw i32 %x1.071, %x2.072
-  %mul = mul nsw i32 %x3.073, %x2.072
-  %div = sdiv i32 %x3.073, %x4.074
-  %add19 = add nsw i32 %x5.075, %x4.074
-  %sub20 = sub nsw i32 %x5.075, %x6.076
-  %add21 = add nsw i32 %x7.077, %x6.076
-  %mul22 = mul nsw i32 %x8.078, %x7.077
-  %sub23 = sub nsw i32 %x8.078, %add
-  %inc = add nuw nsw i32 %i.079, 1
-  %exitcond = icmp eq i32 %inc, %size
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7
-}
-
-attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"NumRegisterParameters", i32 0}
-!1 = !{i32 1, !"wchar_size", i32 2}
-!2 = !{!"clang version 5.0.0 (cfe/trunk 305640)"}
-!3 = !{!4, !4, i64 0}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
-!7 = distinct !{!7, !8}
-!8 = !{!"llvm.loop.unroll.disable"}