6 năm trước cách đây · 7eae8125c6
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -455,15 +455,24 @@ class MachineBlockPlacement : public MachineFunctionPass {
 
				                                const MachineBasicBlock *OldTop);
			
 
				   bool hasViableTopFallthrough(const MachineBasicBlock *Top,
			
 
				                                const BlockFilterSet &LoopBlockSet);
			
 
				+  BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
			
 
				+                                    const BlockFilterSet &LoopBlockSet);
			
 
				+  BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
			
 
				+                                  const MachineBasicBlock *OldTop,
			
 
				+                                  const MachineBasicBlock *ExitBB,
			
 
				+                                  const BlockFilterSet &LoopBlockSet);
			
 
				+  MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
			
 
				+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
			
 
				   MachineBasicBlock *findBestLoopTop(
			
 
				       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
			
 
				   MachineBasicBlock *findBestLoopExit(
			
 
				-      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
			
 
				+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
			
 
				+      BlockFrequency &ExitFreq);
			
 
				   BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
			
 
				   void buildLoopChains(const MachineLoop &L);
			
 
				   void rotateLoop(
			
 
				       BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
			
 
				-      const BlockFilterSet &LoopBlockSet);
			
 
				+      BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
			
 
				   void rotateLoopWithProfile(
			
 
				       BlockChain &LoopChain, const MachineLoop &L,
			
 
				       const BlockFilterSet &LoopBlockSet);
			
@@ -1790,66 +1799,205 @@ MachineBlockPlacement::canMoveBottomBlockToTop(
 
				   return true;
			
 
				 }
			
 
				 
			
 
				-/// Find the best loop top block for layout.
			
 
				+// Find out the possible fall through frequence to the top of a loop.
			
 
				+BlockFrequency
			
 
				+MachineBlockPlacement::TopFallThroughFreq(
			
 
				+    const MachineBasicBlock *Top,
			
 
				+    const BlockFilterSet &LoopBlockSet) {
			
 
				+  BlockFrequency MaxFreq = 0;
			
 
				+  for (MachineBasicBlock *Pred : Top->predecessors()) {
			
 
				+    BlockChain *PredChain = BlockToChain[Pred];
			
 
				+    if (!LoopBlockSet.count(Pred) &&
			
 
				+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
			
 
				+      // Found a Pred block can be placed before Top.
			
 
				+      // Check if Top is the best successor of Pred.
			
 
				+      auto TopProb = MBPI->getEdgeProbability(Pred, Top);
			
 
				+      bool TopOK = true;
			
 
				+      for (MachineBasicBlock *Succ : Pred->successors()) {
			
 
				+        auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
			
 
				+        BlockChain *SuccChain = BlockToChain[Succ];
			
 
				+        // Check if Succ can be placed after Pred.
			
 
				+        // Succ should not be in any chain, or it is the head of some chain.
			
 
				+        if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
			
 
				+            (!SuccChain || Succ == *SuccChain->begin())) {
			
 
				+          TopOK = false;
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+      if (TopOK) {
			
 
				+        BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
			
 
				+                                  MBPI->getEdgeProbability(Pred, Top);
			
 
				+        if (EdgeFreq > MaxFreq)
			
 
				+          MaxFreq = EdgeFreq;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  return MaxFreq;
			
 
				+}
			
 
				+
			
 
				+// Compute the fall through gains when move NewTop before OldTop.
			
 
				+//
			
 
				+// In following diagram, edges marked as "-" are reduced fallthrough, edges
			
 
				+// marked as "+" are increased fallthrough, this function computes
			
 
				+//
			
 
				+//      SUM(increased fallthrough) - SUM(decreased fallthrough)
			
 
				+//
			
 
				+//              |
			
 
				+//              | -
			
 
				+//              V
			
 
				+//        --->OldTop
			
 
				+//        |     .
			
 
				+//        |     .
			
 
				+//       +|     .    +
			
 
				+//        |   Pred --->
			
 
				+//        |     |-
			
 
				+//        |     V
			
 
				+//        --- NewTop <---
			
 
				+//              |-
			
 
				+//              V
			
 
				+//
			
 
				+BlockFrequency
			
 
				+MachineBlockPlacement::FallThroughGains(
			
 
				+    const MachineBasicBlock *NewTop,
			
 
				+    const MachineBasicBlock *OldTop,
			
 
				+    const MachineBasicBlock *ExitBB,
			
 
				+    const BlockFilterSet &LoopBlockSet) {
			
 
				+  BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
			
 
				+  BlockFrequency FallThrough2Exit = 0;
			
 
				+  if (ExitBB)
			
 
				+    FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
			
 
				+        MBPI->getEdgeProbability(NewTop, ExitBB);
			
 
				+  BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
			
 
				+      MBPI->getEdgeProbability(NewTop, OldTop);
			
 
				+
			
 
				+  // Find the best Pred of NewTop.
			
 
				+   MachineBasicBlock *BestPred = nullptr;
			
 
				+   BlockFrequency FallThroughFromPred = 0;
			
 
				+   for (MachineBasicBlock *Pred : NewTop->predecessors()) {
			
 
				+     if (!LoopBlockSet.count(Pred))
			
 
				+       continue;
			
 
				+     BlockChain *PredChain = BlockToChain[Pred];
			
 
				+     if (!PredChain || Pred == *std::prev(PredChain->end())) {
			
 
				+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
			
 
				+           MBPI->getEdgeProbability(Pred, NewTop);
			
 
				+       if (EdgeFreq > FallThroughFromPred) {
			
 
				+         FallThroughFromPred = EdgeFreq;
			
 
				+         BestPred = Pred;
			
 
				+       }
			
 
				+     }
			
 
				+   }
			
 
				+
			
 
				+   // If NewTop is not placed after Pred, another successor can be placed
			
 
				+   // after Pred.
			
 
				+   BlockFrequency NewFreq = 0;
			
 
				+   if (BestPred) {
			
 
				+     for (MachineBasicBlock *Succ : BestPred->successors()) {
			
 
				+       if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
			
 
				+         continue;
			
 
				+       if (ComputedEdges.find(Succ) != ComputedEdges.end())
			
 
				+         continue;
			
 
				+       BlockChain *SuccChain = BlockToChain[Succ];
			
 
				+       if ((SuccChain && (Succ != *SuccChain->begin())) ||
			
 
				+           (SuccChain == BlockToChain[BestPred]))
			
 
				+         continue;
			
 
				+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
			
 
				+           MBPI->getEdgeProbability(BestPred, Succ);
			
 
				+       if (EdgeFreq > NewFreq)
			
 
				+         NewFreq = EdgeFreq;
			
 
				+     }
			
 
				+     BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
			
 
				+         MBPI->getEdgeProbability(BestPred, NewTop);
			
 
				+     if (NewFreq > OrigEdgeFreq) {
			
 
				+       // If NewTop is not the best successor of Pred, then Pred doesn't
			
 
				+       // fallthrough to NewTop. So there is no FallThroughFromPred and
			
 
				+       // NewFreq.
			
 
				+       NewFreq = 0;
			
 
				+       FallThroughFromPred = 0;
			
 
				+     }
			
 
				+   }
			
 
				+
			
 
				+   BlockFrequency Result = 0;
			
 
				+   BlockFrequency Gains = BackEdgeFreq + NewFreq;
			
 
				+   BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
			
 
				+       FallThroughFromPred;
			
 
				+   if (Gains > Lost)
			
 
				+     Result = Gains - Lost;
			
 
				+   return Result;
			
 
				+}
			
 
				+
			
 
				+/// Helper function of findBestLoopTop. Find the best loop top block
			
 
				+/// from predecessors of old top.
			
 
				 ///
			
 
				-/// Look for a block which is strictly better than the loop header for laying
			
 
				-/// out at the top of the loop. This looks for one and only one pattern:
			
 
				-/// a latch block with no conditional exit. This block will cause a conditional
			
 
				-/// jump around it or will be the bottom of the loop if we lay it out in place,
			
 
				-/// but if it it doesn't end up at the bottom of the loop for any reason,
			
 
				-/// rotation alone won't fix it. Because such a block will always result in an
			
 
				-/// unconditional jump (for the backedge) rotating it in front of the loop
			
 
				-/// header is always profitable.
			
 
				+/// Look for a block which is strictly better than the old top for laying
			
 
				+/// out before the old top of the loop. This looks for only two patterns:
			
 
				+///
			
 
				+///     1. a block has only one successor, the old loop top
			
 
				+///
			
 
				+///        Because such a block will always result in an unconditional jump,
			
 
				+///        rotating it in front of the old top is always profitable.
			
 
				+///
			
 
				+///     2. a block has two successors, one is old top, another is exit
			
 
				+///        and it has more than one predecessors
			
 
				+///
			
 
				+///        If it is below one of its predecessors P, only P can fall through to
			
 
				+///        it, all other predecessors need a jump to it, and another conditional
			
 
				+///        jump to loop header. If it is moved before loop header, all its
			
 
				+///        predecessors jump to it, then fall through to loop header. So all its
			
 
				+///        predecessors except P can reduce one taken branch.
			
 
				+///        At the same time, move it before old top increases the taken branch
			
 
				+///        to loop exit block, so the reduced taken branch will be compared with
			
 
				+///        the increased taken branch to the loop exit block.
			
 
				 MachineBasicBlock *
			
 
				-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
			
 
				-                                       const BlockFilterSet &LoopBlockSet) {
			
 
				-  // Placing the latch block before the header may introduce an extra branch
			
 
				-  // that skips this block the first time the loop is executed, which we want
			
 
				-  // to avoid when optimising for size.
			
 
				-  // FIXME: in theory there is a case that does not introduce a new branch,
			
 
				-  // i.e. when the layout predecessor does not fallthrough to the loop header.
			
 
				-  // In practice this never happens though: there always seems to be a preheader
			
 
				-  // that can fallthrough and that is also placed before the header.
			
 
				-  if (F->getFunction().hasOptSize())
			
 
				-    return L.getHeader();
			
 
				-
			
 
				+MachineBlockPlacement::findBestLoopTopHelper(
			
 
				+    MachineBasicBlock *OldTop,
			
 
				+    const MachineLoop &L,
			
 
				+    const BlockFilterSet &LoopBlockSet) {
			
 
				   // Check that the header hasn't been fused with a preheader block due to
			
 
				   // crazy branches. If it has, we need to start with the header at the top to
			
 
				   // prevent pulling the preheader into the loop body.
			
 
				-  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
			
 
				+  BlockChain &HeaderChain = *BlockToChain[OldTop];
			
 
				   if (!LoopBlockSet.count(*HeaderChain.begin()))
			
 
				-    return L.getHeader();
			
 
				+    return OldTop;
			
 
				 
			
 
				-  LLVM_DEBUG(dbgs() << "Finding best loop top for: "
			
 
				-                    << getBlockName(L.getHeader()) << "\n");
			
 
				+  LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
			
 
				+                    << "\n");
			
 
				 
			
 
				-  BlockFrequency BestPredFreq;
			
 
				+  BlockFrequency BestGains = 0;
			
 
				   MachineBasicBlock *BestPred = nullptr;
			
 
				-  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
			
 
				+  for (MachineBasicBlock *Pred : OldTop->predecessors()) {
			
 
				     if (!LoopBlockSet.count(Pred))
			
 
				       continue;
			
 
				-    LLVM_DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
			
 
				+    if (Pred == L.getHeader())
			
 
				+      continue;
			
 
				+    LLVM_DEBUG(dbgs() << "   old top pred: " << getBlockName(Pred) << ", has "
			
 
				                       << Pred->succ_size() << " successors, ";
			
 
				                MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
			
 
				-    if (Pred->succ_size() > 1)
			
 
				+    if (Pred->succ_size() > 2)
			
 
				       continue;
			
 
				 
			
 
				-    if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
			
 
				+    MachineBasicBlock *OtherBB = nullptr;
			
 
				+    if (Pred->succ_size() == 2) {
			
 
				+      OtherBB = *Pred->succ_begin();
			
 
				+      if (OtherBB == OldTop)
			
 
				+        OtherBB = *Pred->succ_rbegin();
			
 
				+    }
			
 
				+
			
 
				+    if (!canMoveBottomBlockToTop(Pred, OldTop))
			
 
				       continue;
			
 
				 
			
 
				-    BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
			
 
				-    if (!BestPred || PredFreq > BestPredFreq ||
			
 
				-        (!(PredFreq < BestPredFreq) &&
			
 
				-         Pred->isLayoutSuccessor(L.getHeader()))) {
			
 
				+    BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
			
 
				+                                            LoopBlockSet);
			
 
				+    if ((Gains > 0) && (Gains > BestGains ||
			
 
				+        ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
			
 
				       BestPred = Pred;
			
 
				-      BestPredFreq = PredFreq;
			
 
				+      BestGains = Gains;
			
 
				     }
			
 
				   }
			
 
				 
			
 
				   // If no direct predecessor is fine, just use the loop header.
			
 
				   if (!BestPred) {
			
 
				     LLVM_DEBUG(dbgs() << "    final top unchanged\n");
			
 
				-    return L.getHeader();
			
 
				+    return OldTop;
			
 
				   }
			
 
				 
			
 
				   // Walk backwards through any straight line of predecessors.
			
@@ -1862,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
 
				   return BestPred;
			
 
				 }
			
 
				 
			
 
				+/// Find the best loop top block for layout.
			
 
				+///
			
 
				+/// This function iteratively calls findBestLoopTopHelper, until no new better
			
 
				+/// BB can be found.
			
 
				+MachineBasicBlock *
			
 
				+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
			
 
				+                                       const BlockFilterSet &LoopBlockSet) {
			
 
				+  // Placing the latch block before the header may introduce an extra branch
			
 
				+  // that skips this block the first time the loop is executed, which we want
			
 
				+  // to avoid when optimising for size.
			
 
				+  // FIXME: in theory there is a case that does not introduce a new branch,
			
 
				+  // i.e. when the layout predecessor does not fallthrough to the loop header.
			
 
				+  // In practice this never happens though: there always seems to be a preheader
			
 
				+  // that can fallthrough and that is also placed before the header.
			
 
				+  if (F->getFunction().hasOptSize())
			
 
				+    return L.getHeader();
			
 
				+
			
 
				+  MachineBasicBlock *OldTop = nullptr;
			
 
				+  MachineBasicBlock *NewTop = L.getHeader();
			
 
				+  while (NewTop != OldTop) {
			
 
				+    OldTop = NewTop;
			
 
				+    NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
			
 
				+    if (NewTop != OldTop)
			
 
				+      ComputedEdges[NewTop] = { OldTop, false };
			
 
				+  }
			
 
				+  return NewTop;
			
 
				+}
			
 
				+
			
 
				 /// Find the best loop exiting block for layout.
			
 
				 ///
			
 
				 /// This routine implements the logic to analyze the loop looking for the best
			
@@ -1869,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
 
				 /// fallthrough opportunities.
			
 
				 MachineBasicBlock *
			
 
				 MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
			
 
				-                                        const BlockFilterSet &LoopBlockSet) {
			
 
				+                                        const BlockFilterSet &LoopBlockSet,
			
 
				+                                        BlockFrequency &ExitFreq) {
			
 
				   // We don't want to layout the loop linearly in all cases. If the loop header
			
 
				   // is just a normal basic block in the loop, we want to look for what block
			
 
				   // within the loop is the best one to layout at the top. However, if the loop
			
@@ -1980,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
 
				 
			
 
				   LLVM_DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB)
			
 
				                     << "\n");
			
 
				+  ExitFreq = BestExitEdgeFreq;
			
 
				   return ExitingBB;
			
 
				 }
			
 
				 
			
@@ -2024,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough(
 
				 /// of its bottom already, don't rotate it.
			
 
				 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
			
 
				                                        const MachineBasicBlock *ExitingBB,
			
 
				+                                       BlockFrequency ExitFreq,
			
 
				                                        const BlockFilterSet &LoopBlockSet) {
			
 
				   if (!ExitingBB)
			
 
				     return;
			
@@ -2047,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 
				           (!SuccChain || Succ == *SuccChain->begin()))
			
 
				         return;
			
 
				     }
			
 
				+
			
 
				+    // Rotate will destroy the top fallthrough, we need to ensure the new exit
			
 
				+    // frequency is larger than top fallthrough.
			
 
				+    BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
			
 
				+    if (FallThrough2Top >= ExitFreq)
			
 
				+      return;
			
 
				   }
			
 
				 
			
 
				   BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
			
@@ -2102,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 
				 void MachineBlockPlacement::rotateLoopWithProfile(
			
 
				     BlockChain &LoopChain, const MachineLoop &L,
			
 
				     const BlockFilterSet &LoopBlockSet) {
			
 
				-  auto HeaderBB = L.getHeader();
			
 
				-  auto HeaderIter = llvm::find(LoopChain, HeaderBB);
			
 
				   auto RotationPos = LoopChain.end();
			
 
				 
			
 
				   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
			
@@ -2123,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
 
				   // chain head is not the loop header. As we only consider natural loops with
			
 
				   // single header, this computation can be done only once.
			
 
				   BlockFrequency HeaderFallThroughCost(0);
			
 
				-  for (auto *Pred : HeaderBB->predecessors()) {
			
 
				+  MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
			
 
				+  for (auto *Pred : ChainHeaderBB->predecessors()) {
			
 
				     BlockChain *PredChain = BlockToChain[Pred];
			
 
				     if (!LoopBlockSet.count(Pred) &&
			
 
				         (!PredChain || Pred == *std::prev(PredChain->end()))) {
			
 
				-      auto EdgeFreq =
			
 
				-          MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
			
 
				+      auto EdgeFreq = MBFI->getBlockFreq(Pred) *
			
 
				+          MBPI->getEdgeProbability(Pred, ChainHeaderBB);
			
 
				       auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
			
 
				       // If the predecessor has only an unconditional jump to the header, we
			
 
				       // need to consider the cost of this jump.
			
@@ -2178,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
 
				     // If the current BB is the loop header, we need to take into account the
			
 
				     // cost of the missed fall through edge from outside of the loop to the
			
 
				     // header.
			
 
				-    if (Iter != HeaderIter)
			
 
				+    if (Iter != LoopChain.begin())
			
 
				       Cost += HeaderFallThroughCost;
			
 
				 
			
 
				     // Collect the loop exit cost by summing up frequencies of all exit edges
			
@@ -2299,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
 
				   // loop. This will default to the header, but may end up as one of the
			
 
				   // predecessors to the header if there is one which will result in strictly
			
 
				   // fewer branches in the loop body.
			
 
				-  // When we use profile data to rotate the loop, this is unnecessary.
			
 
				-  MachineBasicBlock *LoopTop =
			
 
				-      RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
			
 
				+  MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
			
 
				 
			
 
				   // If we selected just the header for the loop top, look for a potentially
			
 
				   // profitable exit block in the event that rotating the loop can eliminate
			
@@ -2310,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
 
				   // Loops are processed innermost to uttermost, make sure we clear
			
 
				   // PreferredLoopExit before processing a new loop.
			
 
				   PreferredLoopExit = nullptr;
			
 
				+  BlockFrequency ExitFreq;
			
 
				   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
			
 
				-    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
			
 
				+    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
			
 
				 
			
 
				   BlockChain &LoopChain = *BlockToChain[LoopTop];
			
 
				 
			
@@ -2331,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
 
				   if (RotateLoopWithProfile)
			
 
				     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
			
 
				   else
			
 
				-    rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
			
 
				+    rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
			
 
				 
			
 
				   LLVM_DEBUG({
			
 
				     // Crash at the end so we get all of the debugging output first.
			
--- a/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) {
 
				 ; CHECK: mov w22, #2
			
 
				 ; CHECK-NOT: mov w22, #4
			
 
				 ; CHECK-NOT: cmn w22, #4
			
 
				-; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]]
			
 
				+; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond
			
 
				 ; CHECK-NOT: b.ne [[LOOP2]]
			
 
				 ; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}}
			
 
				 ; CHECK: bl _foo
			
--- a/test/CodeGen/AArch64/neg-imm.ll
+++ b/test/CodeGen/AArch64/neg-imm.ll
@@ -1,4 +1,4 @@
 
				-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
			
 
				+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s
			
 
				 ; LSR used to pick a sub-optimal solution due to the target responding
			
 
				 ; conservatively to isLegalAddImmediate for negative values.
			
 
				 
			
--- a/test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ b/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -1,9 +1,8 @@
 
				 ; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
			
 
				 
			
 
				 ; CHECK-LABEL: test:
			
 
				-; CHECK:       LBB0_7:
			
 
				-; CHECK:         b.hi	
			
 
				-; CHECK-NEXT:    b	
			
 
				+; CHECK-LABEL: %cond.false12.i
			
 
				+; CHECK:         b.gt	
			
 
				 ; CHECK-NEXT:  LBB0_8:
			
 
				 ; CHECK-NEXT:    mov	 x8, x9
			
 
				 ; CHECK-NEXT:  LBB0_9:
			
--- a/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -230,6 +230,11 @@ bb.end:                                           ; preds = %bb.then, %bb
 
				 ; Make sure scc liveness is updated if sor_b64 is removed
			
 
				 ; ALL-LABEL: {{^}}scc_liveness:
			
 
				 
			
 
				+; GCN: %bb10
			
 
				+; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
			
 
				+; GCN: s_andn2_b64
			
 
				+; GCN-NEXT: s_cbranch_execz
			
 
				+
			
 
				 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
			
 
				 ; GCN: s_andn2_b64 exec, exec,
			
 
				 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
			
@@ -239,10 +244,6 @@ bb.end:                                           ; preds = %bb.then, %bb
 
				 
			
 
				 ; GCN-NOT: s_or_b64 exec, exec
			
 
				 
			
 
				-; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
			
 
				-; GCN: s_andn2_b64
			
 
				-; GCN-NEXT: s_cbranch_execnz
			
 
				-
			
 
				 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
			
 
				 ; GCN: buffer_store_dword
			
 
				 ; GCN: buffer_store_dword
			
--- a/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -19,38 +19,39 @@ define amdgpu_ps void @main(i32, float) {
 
				 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
			
 
				 ; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
			
 
				 ; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
			
 
				-; CHECK-NEXT:  BB0_1: ; %loop
			
 
				+; CHECK-NEXT:    s_branch BB0_3
			
 
				+; CHECK-NEXT:  BB0_1: ; in Loop: Header=BB0_3 Depth=1
			
 
				+; CHECK-NEXT:    ; implicit-def: $vgpr1
			
 
				+; CHECK-NEXT:  BB0_2: ; %Flow
			
 
				+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
			
 
				+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
			
 
				+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
			
 
				+; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
			
 
				+; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
			
 
				+; CHECK-NEXT:    s_cbranch_execz BB0_7
			
 
				+; CHECK-NEXT:  BB0_3: ; %loop
			
 
				 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
			
 
				 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
			
 
				 ; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
			
 
				 ; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
			
 
				 ; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
			
 
				-; CHECK-NEXT:    s_cbranch_vccz BB0_5
			
 
				-; CHECK-NEXT:  ; %bb.2: ; %endif1
			
 
				-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
			
 
				+; CHECK-NEXT:    s_cbranch_vccz BB0_1
			
 
				+; CHECK-NEXT:  ; %bb.4: ; %endif1
			
 
				+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
			
 
				 ; CHECK-NEXT:    s_mov_b64 s[6:7], -1
			
 
				 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
			
 
				 ; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
			
 
				-; CHECK-NEXT:    ; mask branch BB0_4
			
 
				-; CHECK-NEXT:  BB0_3: ; %endif2
			
 
				-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
			
 
				+; CHECK-NEXT:    ; mask branch BB0_6
			
 
				+; CHECK-NEXT:  BB0_5: ; %endif2
			
 
				+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
			
 
				 ; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
			
 
				 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
			
 
				-; CHECK-NEXT:  BB0_4: ; %Flow1
			
 
				-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
			
 
				+; CHECK-NEXT:  BB0_6: ; %Flow1
			
 
				+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
			
 
				 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
			
 
				 ; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
			
 
				-; CHECK-NEXT:    s_branch BB0_6
			
 
				-; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1
			
 
				-; CHECK-NEXT:    ; implicit-def: $vgpr1
			
 
				-; CHECK-NEXT:  BB0_6: ; %Flow
			
 
				-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
			
 
				-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
			
 
				-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
			
 
				-; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
			
 
				-; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
			
 
				-; CHECK-NEXT:    s_cbranch_execnz BB0_1
			
 
				-; CHECK-NEXT:  ; %bb.7: ; %Flow2
			
 
				+; CHECK-NEXT:    s_branch BB0_2
			
 
				+; CHECK-NEXT:  BB0_7: ; %Flow2
			
 
				 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
			
 
				 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
			
 
				 ; this is the divergent branch with the condition not marked as divergent
			
--- a/test/CodeGen/AMDGPU/global_smrd_cfg.ll
+++ b/test/CodeGen/AMDGPU/global_smrd_cfg.ll
@@ -1,27 +1,28 @@
 
				 ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs  < %s | FileCheck %s
			
 
				 
			
 
				-; CHECK-LABEL: %bb11
			
 
				+; CHECK-LABEL: %bb22
			
 
				 
			
 
				-; Load from %arg in a Loop body has alias store
			
 
				+; Load from %arg has alias store in Loop
			
 
				 
			
 
				 ; CHECK: flat_load_dword
			
 
				 
			
 
				-; CHECK-LABEL: %bb20
			
 
				-; CHECK: flat_store_dword
			
 
				+; #####################################################################
			
 
				+
			
 
				+; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
			
 
				+
			
 
				+; CHECK: s_load_dword
			
 
				 
			
 
				 ; #####################################################################
			
 
				 
			
 
				-; CHECK-LABEL: %bb22
			
 
				+; CHECK-LABEL: %bb11
			
 
				 
			
 
				-; Load from %arg has alias store in Loop
			
 
				+; Load from %arg in a Loop body has alias store
			
 
				 
			
 
				 ; CHECK: flat_load_dword
			
 
				 
			
 
				-; #####################################################################
			
 
				-
			
 
				-; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
			
 
				+; CHECK-LABEL: %bb20
			
 
				 
			
 
				-; CHECK: s_load_dword
			
 
				+; CHECK: flat_store_dword
			
 
				 
			
 
				 define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
			
 
				 bb:
			
--- a/test/CodeGen/AMDGPU/hoist-cond.ll
+++ b/test/CodeGen/AMDGPU/hoist-cond.ll
@@ -1,4 +1,4 @@
 
				-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
			
 
				+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s
			
 
				 
			
 
				 ; Check that invariant compare is hoisted out of the loop.
			
 
				 ; At the same time condition shall not be serialized into a VGPR and deserialized later
			
--- a/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -3,20 +3,20 @@
 
				 
			
 
				 ; SI-LABEL: {{^}}i1_copy_from_loop:
			
 
				 ;
			
 
				+; SI: ; %Flow
			
 
				+; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
			
 
				+; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec
			
 
				+; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
			
 
				+
			
 
				 ; SI: ; %for.body
			
 
				 ; SI:      v_cmp_gt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
			
 
				-; SI-DAG:  s_andn2_b64       [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
			
 
				+; SI-DAG:  s_andn2_b64       [[CC_ACCUM]], [[CC_ACCUM]], exec
			
 
				 ; SI-DAG:  s_and_b64         [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
			
 
				 ; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
			
 
				 
			
 
				 ; SI: ; %Flow1
			
 
				 ; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], exec
			
 
				 
			
 
				-; SI: ; %Flow
			
 
				-; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
			
 
				-; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
			
 
				-; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
			
 
				-
			
 
				 ; SI: ; %for.end
			
 
				 ; SI:      s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]]
			
 
				 
			
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
 
				 ; GCN-LABEL: {{^}}broken_phi_bb:
			
 
				 ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
			
 
				 
			
 
				-; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]]
			
 
				-
			
 
				-; GCN: {{^BB[0-9]+_[0-9]+}}:
			
 
				-; GCN: s_mov_b64 exec,
			
 
				-
			
 
				-; GCN: [[BB2]]:
			
 
				+; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
			
 
				 ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
			
 
				 ; GCN: buffer_load_dword
			
 
				 
			
@@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
 
				 ; IDXMODE: s_set_gpr_idx_off
			
 
				 
			
 
				 ; GCN: s_cbranch_execnz [[REGLOOP]]
			
 
				+
			
 
				+; GCN: {{^; %bb.[0-9]}}:
			
 
				+; GCN: s_mov_b64 exec,
			
 
				+; GCN: s_branch [[BB2]]
			
 
				+
			
 
				 define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
			
 
				 bb:
			
 
				   br label %bb2
			
--- a/test/CodeGen/AMDGPU/loop_break.ll
+++ b/test/CodeGen/AMDGPU/loop_break.ll
@@ -1,5 +1,5 @@
 
				 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
			
 
				-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
			
 
				+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
			
 
				 
			
 
				 ; Uses llvm.amdgcn.break
			
 
				 
			
--- a/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -61,9 +61,9 @@ loopexit:
 
				 
			
 
				 ; GCN-LABEL: {{^}}break_cond_is_arg:
			
 
				 ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
			
 
				+; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
			
 
				 ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
			
 
				-; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]],
			
 
				-; GCN: s_andn2_b64 exec, exec, [[REG3]]
			
 
				+; GCN: s_or_b64 [[REG3]], [[REG2]],
			
 
				 
			
 
				 define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
			
 
				 entry:
			
--- a/test/CodeGen/AMDGPU/madmk.ll
+++ b/test/CodeGen/AMDGPU/madmk.ll
@@ -188,9 +188,9 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias
 
				 }
			
 
				 
			
 
				 ; SI-LABEL: {{^}}kill_madmk_verifier_error:
			
 
				+; SI: s_or_b64
			
 
				 ; SI: s_xor_b64
			
 
				 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
			
 
				-; SI: s_or_b64
			
 
				 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
			
 
				 bb:
			
 
				   br label %bb2
			
--- a/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -24,13 +24,29 @@
 
				 ; GCN: ; %main_body
			
 
				 ; GCN:      s_mov_b64           [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
			
 
				 
			
 
				+; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
			
 
				+; GCN:      s_or_b64            exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
			
 
				+; GCN:      s_and_b64           [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
			
 
				+; GCN:      s_or_b64            [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
			
 
				+; GCN:      s_mov_b64           [[LEFT_OUTER]], [[TMP1]]
			
 
				+; GCN:      s_andn2_b64         exec, exec, [[TMP1]]
			
 
				+; GCN:      s_cbranch_execz    [[IF_BLOCK:BB[0-9]+_[0-9]+]]
			
 
				+
			
 
				 ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
			
 
				 ; GCN:      s_mov_b64           [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
			
 
				 
			
 
				+; GCN: ; %Flow
			
 
				+; GCN:      s_or_b64            exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
			
 
				+; GCN:      s_and_b64           [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
			
 
				+; GCN:      s_or_b64            [[TMP0]], [[TMP0]], [[LEFT_INNER]]
			
 
				+; GCN:      s_mov_b64           [[LEFT_INNER]], [[TMP0]]
			
 
				+; GCN:      s_andn2_b64         exec, exec, [[TMP0]]
			
 
				+; GCN:      s_cbranch_execz    [[FLOW2]]
			
 
				+
			
 
				 ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
			
 
				-; GCN:      s_or_b64            [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec
			
 
				-; GCN:      s_or_b64            [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec
			
 
				-; GCN:      s_and_saveexec_b64  [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
			
 
				+; GCN:      s_or_b64            [[BREAK_OUTER]], [[BREAK_OUTER]], exec
			
 
				+; GCN:      s_or_b64            [[BREAK_INNER]], [[BREAK_INNER]], exec
			
 
				+; GCN:      s_and_saveexec_b64  [[SAVE_EXEC]], vcc
			
 
				 
			
 
				 ; FIXME: duplicate comparison
			
 
				 ; GCN: ; %ENDIF
			
@@ -43,23 +59,7 @@
 
				 ; GCN-DAG:  s_or_b64            [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]]
			
 
				 ; GCN-DAG:  s_or_b64            [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]]
			
 
				 
			
 
				-; GCN: ; %Flow
			
 
				-; GCN:      s_or_b64            exec, exec, [[SAVE_EXEC]]
			
 
				-; GCN:      s_and_b64           [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]]
			
 
				-; GCN:      s_or_b64            [[TMP0]], [[TMP0]], [[LEFT_INNER]]
			
 
				-; GCN:      s_mov_b64           [[LEFT_INNER]], [[TMP0]]
			
 
				-; GCN:      s_andn2_b64         exec, exec, [[TMP0]]
			
 
				-; GCN:      s_cbranch_execnz    [[INNER_LOOP]]
			
 
				-
			
 
				-; GCN: ; %Flow2
			
 
				-; GCN:      s_or_b64            exec, exec, [[TMP0]]
			
 
				-; GCN:      s_and_b64           [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]]
			
 
				-; GCN:      s_or_b64            [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
			
 
				-; GCN:      s_mov_b64           [[LEFT_OUTER]], [[TMP1]]
			
 
				-; GCN:      s_andn2_b64         exec, exec, [[TMP1]]
			
 
				-; GCN:      s_cbranch_execnz    [[OUTER_LOOP]]
			
 
				-
			
 
				-; GCN: ; %IF
			
 
				+; GCN: [[IF_BLOCK]]: ; %IF
			
 
				 ; GCN-NEXT: s_endpgm
			
 
				 define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
			
 
				 main_body:
			
@@ -92,12 +92,18 @@ ENDIF:                                            ; preds = %LOOP
 
				 ; GCN-LABEL: {{^}}multi_if_break_loop:
			
 
				 ; GCN:      s_mov_b64          [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}}
			
 
				 
			
 
				+; GCN: ; %Flow4
			
 
				+; GCN:      s_and_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]]
			
 
				+; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]]
			
 
				+; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
			
 
				+; GCN-NEXT: s_cbranch_execz
			
 
				+
			
 
				 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
			
 
				-; GCN:      s_mov_b64          [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
			
 
				+; GCN:      s_mov_b64          [[OLD_LEFT]], [[LEFT]]
			
 
				 
			
 
				 ; GCN: ; %LeafBlock1
			
 
				 ; GCN:      s_mov_b64
			
 
				-; GCN:      s_mov_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
			
 
				+; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
			
 
				 
			
 
				 ; GCN: ; %case1
			
 
				 ; GCN:      buffer_load_dword  [[LOAD2:v[0-9]+]],
			
@@ -118,12 +124,6 @@ ENDIF:                                            ; preds = %LOOP
 
				 ; GCN-DAG:  s_and_b64          [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
			
 
				 ; GCN:      s_or_b64           [[BREAK]], [[BREAK]], [[TMP]]
			
 
				 
			
 
				-; GCN: ; %Flow4
			
 
				-; GCN:      s_and_b64          [[BREAK]], exec, [[BREAK]]
			
 
				-; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT]]
			
 
				-; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
			
 
				-; GCN-NEXT: s_cbranch_execnz
			
 
				-
			
 
				 define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
			
 
				 bb:
			
 
				   %id = call i32 @llvm.amdgcn.workitem.id.x()
			
--- a/test/CodeGen/AMDGPU/optimize-negated-cond.ll
+++ b/test/CodeGen/AMDGPU/optimize-negated-cond.ll
@@ -3,11 +3,11 @@
 
				 ; GCN-LABEL: {{^}}negated_cond:
			
 
				 ; GCN: BB0_1:
			
 
				 ; GCN:   v_cmp_eq_u32_e64 [[CC:[^,]+]],
			
 
				-; GCN: BB0_2:
			
 
				+; GCN: BB0_3:
			
 
				 ; GCN-NOT: v_cndmask_b32
			
 
				 ; GCN-NOT: v_cmp
			
 
				 ; GCN:   s_andn2_b64 vcc, exec, [[CC]]
			
 
				-; GCN:   s_cbranch_vccnz BB0_4
			
 
				+; GCN:   s_cbranch_vccnz BB0_2
			
 
				 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
			
 
				 bb:
			
 
				   br label %bb1
			
@@ -36,11 +36,11 @@ bb4:
 
				 
			
 
				 ; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
			
 
				 ; GCN:   v_cmp_eq_u32_e64 [[CC:[^,]+]],
			
 
				-; GCN: BB1_1:
			
 
				+; GCN: %bb4
			
 
				 ; GCN-NOT: v_cndmask_b32
			
 
				 ; GCN-NOT: v_cmp
			
 
				 ; GCN:   s_andn2_b64 vcc, exec, [[CC]]
			
 
				-; GCN:   s_cbranch_vccz BB1_3
			
 
				+; GCN:   s_cbranch_vccnz BB1_1
			
 
				 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
			
 
				 bb:
			
 
				   br label %bb2
			
--- a/test/CodeGen/AMDGPU/si-annotate-cf.ll
+++ b/test/CodeGen/AMDGPU/si-annotate-cf.ll
@@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 
				 ; FUNC-LABEL: {{^}}loop_land_info_assert:
			
 
				 ; SI:      v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
			
 
				 ; SI:      s_and_b64        [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
			
 
				-; SI:      s_branch         [[INFLOOP:BB[0-9]+_[0-9]+]]
			
 
				+
			
 
				+; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond
			
 
				+; SI:      s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]]
			
 
				 
			
 
				 ; SI:      [[CONVEX_EXIT:BB[0-9_]+]]
			
 
				 ; SI:      s_mov_b64        vcc,
			
 
				 ; SI-NEXT: s_cbranch_vccnz  [[ENDPGM:BB[0-9]+_[0-9]+]]
			
 
				-; SI:      s_cbranch_vccnz  [[INFLOOP]]
			
 
				+
			
 
				+; SI:      s_cbranch_vccnz  [[WHILELOOP]]
			
 
				 
			
 
				 ; SI: ; %if.else
			
 
				 ; SI:      buffer_store_dword
			
 
				 
			
 
				-; SI:      [[INFLOOP]]:
			
 
				-; SI:      s_cbranch_vccnz [[CONVEX_EXIT]]
			
 
				-
			
 
				-; SI: ; %for.cond.preheader
			
 
				+; SI: [[FOR_COND_PH]]: ; %for.cond.preheader
			
 
				 ; SI:      s_cbranch_vccz [[ENDPGM]]
			
 
				 
			
 
				 ; SI:      [[ENDPGM]]:
			
--- a/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,4 +1,4 @@
 
				-; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
			
 
				+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s
			
 
				 
			
 
				 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
			
 
				 
			
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -650,12 +650,15 @@ main_body:
 
				 ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
			
 
				 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000
			
 
				 
			
 
				-; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
			
 
				-; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
			
 
				+; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop
			
 
				 ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
			
 
				-; CHECK: s_cbranch_vccz [[LOOPHDR]]
			
 
				-; CHECK: ; %break
			
 
				+; CHECK: s_cbranch_vccnz
			
 
				 
			
 
				+; CHECK: ; %body
			
 
				+; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
			
 
				+; CHECK: s_branch [[LOOPHDR]]
			
 
				+
			
 
				+; CHECK: ; %break
			
 
				 ; CHECK: ; return
			
 
				 define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
			
 
				 entry:
			
--- a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@@ -26,7 +26,7 @@ bb1:                                              ; preds = %bb
 
				 
			
 
				 bb2:                                              ; preds = %bb1, %entry
			
 
				 ; CHECK: cmp [[REG]], #0
			
 
				-; CHECK: ble
			
 
				+; CHECK: bgt
			
 
				   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
			
 
				   %tries.0 = sub i32 2147483647, %indvar
			
 
				   %tmp1 = icmp sgt i32 %tries.0, 0
			
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -47,9 +47,8 @@ tailrecurse.switch:                               ; preds = %tailrecurse
 
				 ; V8-NEXT: beq
			
 
				 ; V8-NEXT: %tailrecurse.switch
			
 
				 ; V8: cmp
			
 
				-; V8-NEXT: beq
			
 
				-; V8-NEXT: %sw.epilog
			
 
				-; V8-NEXT: bx lr
			
 
				+; V8-NEXT: bne
			
 
				+; V8-NEXT: %sw.bb
			
 
				   switch i32 %and, label %sw.epilog [
			
 
				     i32 1, label %sw.bb
			
 
				     i32 3, label %sw.bb6
			
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -9,8 +9,8 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
 
				 ; ARM: clrex
			
 
				 
			
 
				 ; T2-LABEL: t:
			
 
				-; T2: strexb
			
 
				 ; T2: ldrexb
			
 
				+; T2: strexb
			
 
				 ; T2: clrex
			
 
				   %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
			
 
				   %tmp1 = extractvalue { i8, i1 } %tmp0, 0
			
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -52,16 +52,16 @@ entry:
 
				 ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
			
 
				 ; CHECK-ARMV7-NEXT: .fnstart
			
 
				 ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
			
 
				-; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
			
 
				-; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
			
 
				-; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
			
 
				+; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
			
 
				+; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS:r[0-9]+]], [r0]
			
 
				+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
			
 
				+; CHECK-ARMV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
			
 
				+; CHECK-ARMV7-NEXT: strexb [[SUCCESS]], r2, [r0]
			
 
				 ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
			
 
				 ; CHECK-ARMV7-NEXT: moveq r0, #1
			
 
				 ; CHECK-ARMV7-NEXT: bxeq lr
			
 
				-; CHECK-ARMV7-NEXT: [[TRY]]:
			
 
				-; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
			
 
				-; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
			
 
				-; CHECK-ARMV7-NEXT: beq [[HEAD]]
			
 
				+; CHECK-ARMV7-NEXT: b [[TRY]]
			
 
				+; CHECK-ARMV7-NEXT: [[EXIT]]:
			
 
				 ; CHECK-ARMV7-NEXT: mov r0, #0
			
 
				 ; CHECK-ARMV7-NEXT: clrex
			
 
				 ; CHECK-ARMV7-NEXT: bx lr
			
@@ -69,17 +69,17 @@ entry:
 
				 ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
			
 
				 ; CHECK-THUMBV7-NEXT: .fnstart
			
 
				 ; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
			
 
				-; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
			
 
				-; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
			
 
				+; CHECK-THUMBV7-NEXT: [[TRYLD:.LBB[0-9_]+]]
			
 
				+; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
			
 
				+; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
			
 
				+; CHECK-THUMBV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
			
 
				 ; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
			
 
				 ; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
			
 
				 ; CHECK-THUMBV7-NEXT: itt eq
			
 
				 ; CHECK-THUMBV7-NEXT: moveq r0, #1
			
 
				 ; CHECK-THUMBV7-NEXT: bxeq lr
			
 
				-; CHECK-THUMBV7-NEXT: [[TRYLD]]:
			
 
				-; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
			
 
				-; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
			
 
				-; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
			
 
				+; CHECK-THUMBV7-NEXT: b [[TRYLD]]
			
 
				+; CHECK-THUMBV7-NEXT: [[EXIT]]:
			
 
				 ; CHECK-THUMBV7-NEXT: movs r0, #0
			
 
				 ; CHECK-THUMBV7-NEXT: clrex
			
 
				 ; CHECK-THUMBV7-NEXT: bx lr
			
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -38,8 +38,9 @@ entry:
 
				   br i1 %0, label %bb5, label %bb.nph15
			
 
				 
			
 
				 bb1:                                              ; preds = %bb2.preheader, %bb1
			
 
				+; CHECK: LBB1_[[BB3:.]]: @ %bb3
			
 
				 ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
			
 
				-; CHECK: blt LBB1_[[BB3:.]]
			
 
				+; CHECK: blt LBB1_[[BB3]]
			
 
				   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
			
 
				   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
			
 
				   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
			
@@ -53,7 +54,6 @@ bb1:                                              ; preds = %bb2.preheader, %bb1
 
				 bb3:                                              ; preds = %bb1, %bb2.preheader
			
 
				 ; CHECK: LBB1_[[BB1:.]]: @ %bb1
			
 
				 ; CHECK: bne LBB1_[[BB1]]
			
 
				-; CHECK: LBB1_[[BB3]]: @ %bb3
			
 
				   %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
			
 
				   %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
			
 
				   %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
			
--- a/test/CodeGen/ARM/pr32578.ll
+++ b/test/CodeGen/ARM/pr32578.ll
@@ -4,7 +4,7 @@ target triple = "armv7"
 
				 ; CHECK-LABEL: func:
			
 
				 ; CHECK: push {r11, lr}
			
 
				 ; CHECK: vpush {d8}
			
 
				-; CHECK: b .LBB0_2
			
 
				+; CHECK: .LBB0_1: @ %tailrecurse
			
 
				 define arm_aapcscc double @func() {
			
 
				   br label %tailrecurse
			
 
				 
			
--- a/test/CodeGen/ARM/swifterror.ll
+++ b/test/CodeGen/ARM/swifterror.ll
@@ -182,7 +182,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
 
				 ; CHECK-APPLE: mov r0, #16
			
 
				 ; CHECK-APPLE: malloc
			
 
				 ; CHECK-APPLE: strb r{{.*}}, [r0, #8]
			
 
				-; CHECK-APPLE: ble
			
 
				+; CHECK-APPLE: b
			
 
				 
			
 
				 ; CHECK-O0-LABEL: foo_loop:
			
 
				 ; CHECK-O0: cmp r{{.*}}, #0
			
--- a/test/CodeGen/Hexagon/bug6757-endloop.ll
+++ b/test/CodeGen/Hexagon/bug6757-endloop.ll
@@ -4,10 +4,10 @@
 
				 ; This situation can arise due to tail duplication.
			
 
				 
			
 
				 ; CHECK: loop1([[LP:.LBB0_[0-9]+]]
			
 
				+; CHECK: endloop1
			
 
				 ; CHECK: [[LP]]:
			
 
				 ; CHECK-NOT: loop1(
			
 
				 ; CHECK: endloop1
			
 
				-; CHECK: endloop1
			
 
				 
			
 
				 %s.0 = type { i32, i8* }
			
 
				 %s.1 = type { i32, i32, i32, i32 }
			
--- a/test/CodeGen/Hexagon/early-if-merge-loop.ll
+++ b/test/CodeGen/Hexagon/early-if-merge-loop.ll
@@ -2,9 +2,11 @@
 
				 ; Make sure that the loop in the end has only one basic block.
			
 
				 
			
 
				 ; CHECK-LABEL: fred
			
 
				+; CHECK: %b2
			
 
				 ; Rely on the comments, make sure the one for the loop header is present.
			
 
				 ; CHECK: %loop
			
 
				-; CHECK-NOT: %should_merge
			
 
				+; CHECK: %should_merge
			
 
				+; CHECK: %exit
			
 
				 
			
 
				 target triple = "hexagon"
			
 
				 
			
--- a/test/CodeGen/Hexagon/prof-early-if.ll
+++ b/test/CodeGen/Hexagon/prof-early-if.ll
@@ -1,8 +1,8 @@
 
				 ; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
			
 
				 ; Rely on the comments generated by llc. Check that "if.then" was not predicated.
			
 
				+; CHECK: b5
			
 
				 ; CHECK: b2
			
 
				 ; CHECK-NOT: if{{.*}}memd
			
 
				-; CHECK: b5
			
 
				 
			
 
				 %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
			
 
				 %s.1 = type { i32, i32 }
			
--- a/test/CodeGen/Hexagon/redundant-branching2.ll
+++ b/test/CodeGen/Hexagon/redundant-branching2.ll
@@ -3,9 +3,9 @@
 
				 
			
 
				 ; CHECK: memub
			
 
				 ; CHECK: memub
			
 
				+; CHECK: cmp.eq
			
 
				 ; CHECK: memub
			
 
				 ; CHECK-NOT: if{{.*}}jump .LBB
			
 
				-; CHECK: cmp.eq
			
 
				 
			
 
				 target triple = "hexagon-unknown--elf"
			
 
				 
			
--- a/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/test/CodeGen/PowerPC/atomics-regression.ll
@@ -401,16 +401,15 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE-LABEL: test40:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				-; PPC64LE-NEXT:    b .LBB40_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB40_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB40_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB40_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB40_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB40_1
			
 
				+; PPC64LE-NEXT:  .LBB40_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic
			
@@ -466,16 +465,15 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB43_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB43_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB43_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB43_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB43_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB43_1
			
 
				+; PPC64LE-NEXT:  .LBB43_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release monotonic
			
@@ -487,16 +485,15 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB44_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB44_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB44_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB44_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB44_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB44_1
			
 
				+; PPC64LE-NEXT:  .LBB44_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire
			
@@ -622,16 +619,15 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE-LABEL: test50:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				-; PPC64LE-NEXT:    b .LBB50_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB50_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB50_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB50_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB50_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB50_1
			
 
				+; PPC64LE-NEXT:  .LBB50_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic
			
@@ -687,16 +683,15 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB53_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB53_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB53_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB53_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB53_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB53_1
			
 
				+; PPC64LE-NEXT:  .LBB53_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic
			
@@ -708,16 +703,15 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB54_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB54_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB54_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB54_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB54_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB54_1
			
 
				+; PPC64LE-NEXT:  .LBB54_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire
			
@@ -842,16 +836,15 @@ define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
 
				 define void @test60(i32* %ptr, i32 %cmp, i32 %val) {
			
 
				 ; PPC64LE-LABEL: test60:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				-; PPC64LE-NEXT:    b .LBB60_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB60_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB60_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB60_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB60_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB60_1
			
 
				+; PPC64LE-NEXT:  .LBB60_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic
			
@@ -904,16 +897,15 @@ define void @test63(i32* %ptr, i32 %cmp, i32 %val) {
 
				 ; PPC64LE-LABEL: test63:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB63_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB63_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB63_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB63_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB63_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB63_1
			
 
				+; PPC64LE-NEXT:  .LBB63_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release monotonic
			
@@ -924,16 +916,15 @@ define void @test64(i32* %ptr, i32 %cmp, i32 %val) {
 
				 ; PPC64LE-LABEL: test64:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB64_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB64_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB64_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB64_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB64_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB64_1
			
 
				+; PPC64LE-NEXT:  .LBB64_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire
			
@@ -1053,16 +1044,15 @@ define void @test69(i32* %ptr, i32 %cmp, i32 %val) {
 
				 define void @test70(i64* %ptr, i64 %cmp, i64 %val) {
			
 
				 ; PPC64LE-LABEL: test70:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				-; PPC64LE-NEXT:    b .LBB70_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB70_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB70_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB70_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB70_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB70_1
			
 
				+; PPC64LE-NEXT:  .LBB70_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic
			
@@ -1115,16 +1105,15 @@ define void @test73(i64* %ptr, i64 %cmp, i64 %val) {
 
				 ; PPC64LE-LABEL: test73:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB73_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB73_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB73_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB73_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB73_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB73_1
			
 
				+; PPC64LE-NEXT:  .LBB73_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release monotonic
			
@@ -1135,16 +1124,15 @@ define void @test74(i64* %ptr, i64 %cmp, i64 %val) {
 
				 ; PPC64LE-LABEL: test74:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB74_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB74_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB74_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB74_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB74_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB74_1
			
 
				+; PPC64LE-NEXT:  .LBB74_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire
			
@@ -1265,16 +1253,15 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE-LABEL: test80:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				-; PPC64LE-NEXT:    b .LBB80_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB80_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB80_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB80_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB80_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB80_1
			
 
				+; PPC64LE-NEXT:  .LBB80_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
			
@@ -1330,16 +1317,15 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB83_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB83_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB83_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB83_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB83_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB83_1
			
 
				+; PPC64LE-NEXT:  .LBB83_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
			
@@ -1351,16 +1337,15 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB84_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB84_1:
			
 
				-; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB84_2:
			
 
				 ; PPC64LE-NEXT:    lbarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB84_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB84_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stbcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB84_1
			
 
				+; PPC64LE-NEXT:  .LBB84_3:
			
 
				 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
			
@@ -1486,16 +1471,15 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE-LABEL: test90:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				-; PPC64LE-NEXT:    b .LBB90_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB90_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB90_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB90_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB90_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b 
			
 
				+; PPC64LE-NEXT:  .LBB90_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
			
@@ -1551,16 +1535,15 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB93_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB93_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB93_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB93_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB93_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB93_1
			
 
				+; PPC64LE-NEXT:  .LBB93_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
			
@@ -1572,16 +1555,15 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB94_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB94_1:
			
 
				-; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB94_2:
			
 
				 ; PPC64LE-NEXT:    lharx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB94_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB94_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    sthcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB94_1
			
 
				+; PPC64LE-NEXT:  .LBB94_3:
			
 
				 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
			
@@ -1706,16 +1688,15 @@ define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
 
				 define void @test100(i32* %ptr, i32 %cmp, i32 %val) {
			
 
				 ; PPC64LE-LABEL: test100:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				-; PPC64LE-NEXT:    b .LBB100_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB100_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB100_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB100_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB100_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB100_1
			
 
				+; PPC64LE-NEXT:  .LBB100_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
			
@@ -1768,16 +1749,15 @@ define void @test103(i32* %ptr, i32 %cmp, i32 %val) {
 
				 ; PPC64LE-LABEL: test103:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB103_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB103_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB103_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB103_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB103_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB103_1
			
 
				+; PPC64LE-NEXT:  .LBB103_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
			
@@ -1788,16 +1768,15 @@ define void @test104(i32* %ptr, i32 %cmp, i32 %val) {
 
				 ; PPC64LE-LABEL: test104:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB104_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB104_1:
			
 
				-; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB104_2:
			
 
				 ; PPC64LE-NEXT:    lwarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpw 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB104_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB104_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stwcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB104_1
			
 
				+; PPC64LE-NEXT:  .LBB104_3:
			
 
				 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
			
@@ -1917,16 +1896,15 @@ define void @test109(i32* %ptr, i32 %cmp, i32 %val) {
 
				 define void @test110(i64* %ptr, i64 %cmp, i64 %val) {
			
 
				 ; PPC64LE-LABEL: test110:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				-; PPC64LE-NEXT:    b .LBB110_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB110_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB110_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB110_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB110_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB110_1
			
 
				+; PPC64LE-NEXT:  .LBB110_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
			
@@ -1979,16 +1957,15 @@ define void @test113(i64* %ptr, i64 %cmp, i64 %val) {
 
				 ; PPC64LE-LABEL: test113:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB113_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB113_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB113_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB113_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB113_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB113_1
			
 
				+; PPC64LE-NEXT:  .LBB113_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
			
@@ -1999,16 +1976,15 @@ define void @test114(i64* %ptr, i64 %cmp, i64 %val) {
 
				 ; PPC64LE-LABEL: test114:
			
 
				 ; PPC64LE:       # %bb.0:
			
 
				 ; PPC64LE-NEXT:    lwsync
			
 
				-; PPC64LE-NEXT:    b .LBB114_2
			
 
				-; PPC64LE-NEXT:    .p2align 5
			
 
				 ; PPC64LE-NEXT:  .LBB114_1:
			
 
				-; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				-; PPC64LE-NEXT:    beqlr 0
			
 
				-; PPC64LE-NEXT:  .LBB114_2:
			
 
				 ; PPC64LE-NEXT:    ldarx 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    cmpd 4, 6
			
 
				-; PPC64LE-NEXT:    beq 0, .LBB114_1
			
 
				-; PPC64LE-NEXT:  # %bb.3:
			
 
				+; PPC64LE-NEXT:    bne 0, .LBB114_3
			
 
				+; PPC64LE-NEXT:  # %bb.2:
			
 
				+; PPC64LE-NEXT:    stdcx. 5, 0, 3
			
 
				+; PPC64LE-NEXT:    beqlr 0
			
 
				+; PPC64LE-NEXT:    b .LBB114_1
			
 
				+; PPC64LE-NEXT:  .LBB114_3:
			
 
				 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
			
 
				 ; PPC64LE-NEXT:    blr
			
 
				   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire
			
--- a/test/CodeGen/PowerPC/cmp_elimination.ll
+++ b/test/CodeGen/PowerPC/cmp_elimination.ll
@@ -718,13 +718,14 @@ if.end:
 
				 define void @func28(i32 signext %a) {
			
 
				 ; CHECK-LABEL: @func28
			
 
				 ; CHECK: cmplwi	 [[REG1:[0-9]+]], [[REG2:[0-9]+]]
			
 
				-; CHECK: .[[LABEL1:[A-Z0-9_]+]]:
			
 
				+; CHECK: .[[LABEL2:[A-Z0-9_]+]]:
			
 
				+; CHECK: cmpwi   [[REG1]], [[REG2]]
			
 
				+; CHECK: ble     0, .[[LABEL1:[A-Z0-9_]+]]
			
 
				 ; CHECK-NOT: cmp
			
 
				-; CHECK: bne	 0, .[[LABEL2:[A-Z0-9_]+]]
			
 
				+; CHECK: bne     0, .[[LABEL2]]
			
 
				 ; CHECK: bl dummy1
			
 
				-; CHECK: .[[LABEL2]]:
			
 
				-; CHECK: cmpwi	 [[REG1]], [[REG2]]
			
 
				-; CHECK: bgt	 0, .[[LABEL1]]
			
 
				+; CHECK: b .[[LABEL2]]
			
 
				+; CHECK: .[[LABEL1]]:
			
 
				 ; CHECK: blr
			
 
				 entry:
			
 
				   br label %do.body
			
--- a/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
+++ b/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
@@ -88,7 +88,8 @@ for.body:                                         ; preds = %entry, %for.body
 
				 ; Function Attrs: norecurse nounwind
			
 
				 define signext i32 @testTripCount2NonSmallLoop() {
			
 
				 ; CHECK-LABEL: testTripCount2NonSmallLoop:
			
 
				-; CHECK: bge
			
 
				+; CHECK: blt
			
 
				+; CHECK: beq
			
 
				 ; CHECK: blr
			
 
				 
			
 
				 entry:
			
--- a/test/CodeGen/PowerPC/expand-foldable-isel.ll
+++ b/test/CodeGen/PowerPC/expand-foldable-isel.ll
@@ -29,13 +29,13 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot
 
				 ;
			
 
				 ; CHECK-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
			
 
				 ; CHECK:    mr r4, r3
			
 
				-; CHECK:    bc 12, 4*cr5+lt, .LBB0_3
			
 
				-; CHECK:   # %bb.2:
			
 
				+; CHECK:    bc 12, 4*cr5+lt, [[CASE1:.LBB[0-9_]+]]
			
 
				+; CHECK:   # %bb.
			
 
				 ; CHECK:    ori r29, r6, 0
			
 
				-; CHECK:    b .LBB0_4
			
 
				-; CHECK:  .LBB0_3:
			
 
				+; CHECK:    b [[MERGE:.LBB[0-9_]+]]
			
 
				+; CHECK:  [[CASE1]]:
			
 
				 ; CHECK:    addi r29, r5, 0
			
 
				-; CHECK:  .LBB0_4:
			
 
				+; CHECK:  [[MERGE]]:
			
 
				 ; CHECK:    blr
			
 
				 entry:
			
 
				   br label %while.cond11
			
--- a/test/CodeGen/PowerPC/knowCRBitSpill.ll
+++ b/test/CodeGen/PowerPC/knowCRBitSpill.ll
@@ -86,7 +86,7 @@ define dso_local signext i32 @spillCRUNSET(%struct.p5rx* readonly %p1, i32 signe
 
				 ; CHECK-NOT:    mfocrf [[REG2:.*]], [[CREG]]
			
 
				 ; CHECK-NOT:    rlwinm [[REG2]], [[REG2]]
			
 
				 ; CHECK:        stw [[REG1]]
			
 
				-; CHECK:        .LBB1_1: # %redo_first_pass
			
 
				+; CHECK:        .LBB1_1:
			
 
				 entry:
			
 
				   %and = and i32 %p3, 128
			
 
				   %tobool = icmp eq i32 %and, 0
			
--- a/test/CodeGen/PowerPC/licm-remat.ll
+++ b/test/CodeGen/PowerPC/licm-remat.ll
@@ -24,8 +24,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompres
 
				 ; CHECK-DAG:   addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
			
 
				 ; CHECK-DAG:   addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
			
 
				 ; CHECK-DAG:   addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
			
 
				-; CHECK:       b .[[LABEL1:[A-Z0-9_]+]]
			
 
				-; CHECK:       .[[LABEL1]]: # %for.cond
			
 
				+; CHECK:       .LBB0_2: # %for.cond
			
 
				 ; CHECK-NOT:   addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
			
 
				 ; CHECK-NOT:   addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha
			
 
				 ; CHECK:       bctrl
			
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
@@ -1,8 +1,8 @@
 
				 ; Test 8-bit atomic min/max operations.
			
 
				 ;
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
			
 
				 
			
 
				 ; Check signed minimum.
			
 
				 ; - CHECK is for the main loop.
			
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
@@ -1,8 +1,8 @@
 
				 ; Test 8-bit atomic min/max operations.
			
 
				 ;
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
			
 
				 
			
 
				 ; Check signed minimum.
			
 
				 ; - CHECK is for the main loop.
			
--- a/test/CodeGen/SystemZ/loop-01.ll
+++ b/test/CodeGen/SystemZ/loop-01.ll
@@ -1,7 +1,7 @@
 
				 ; Test loop tuning.
			
 
				 ;
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
			
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-block-placement | FileCheck %s
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-block-placement \
			
 
				 ; RUN:  | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13
			
 
				 
			
 
				 ; Test that strength reduction is applied to addresses with a scale factor,
			
--- a/test/CodeGen/SystemZ/loop-02.ll
+++ b/test/CodeGen/SystemZ/loop-02.ll
@@ -1,7 +1,7 @@
 
				 ; Test BRCTH.
			
 
				 
			
 
				 ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
			
 
				-; RUN:   -no-integrated-as | FileCheck %s
			
 
				+; RUN:   -no-integrated-as -disable-block-placement | FileCheck %s
			
 
				 
			
 
				 ; Test a loop that should be converted into dbr form and then use BRCTH.
			
 
				 define void @f2(i32 *%src, i32 *%dest) {
			
--- a/test/CodeGen/SystemZ/swifterror.ll
+++ b/test/CodeGen/SystemZ/swifterror.ll
@@ -1,5 +1,5 @@
 
				-; RUN: llc < %s -mtriple=s390x-linux-gnu| FileCheck %s
			
 
				-; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu | FileCheck --check-prefix=CHECK-O0 %s
			
 
				+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
			
 
				+; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
			
 
				 
			
 
				 declare i8* @malloc(i64)
			
 
				 declare void @free(i8*)
			
--- a/test/CodeGen/Thumb/consthoist-physical-addr.ll
+++ b/test/CodeGen/Thumb/consthoist-physical-addr.ll
@@ -10,8 +10,9 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 {
 
				 ; CHECK-NEXT:    push {r4, r5, r7, lr}
			
 
				 ; CHECK-NEXT:    movs r2, #0
			
 
				 ; CHECK-NEXT:    ldr r3, .LCPI0_0
			
 
				-; CHECK-NEXT:    b .LBB0_4
			
 
				 ; CHECK-NEXT:  .LBB0_1:
			
 
				+; CHECK-NEXT:    cmp r2, #128
			
 
				+; CHECK-NEXT:    beq .LBB0_5
			
 
				 ; CHECK-NEXT:    movs r4, #0
			
 
				 ; CHECK-NEXT:    str r4, [r3, #8]
			
 
				 ; CHECK-NEXT:    lsls r4, r2, #2
			
@@ -20,16 +21,15 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 {
 
				 ; CHECK-NEXT:    movs r5, #1
			
 
				 ; CHECK-NEXT:    str r5, [r3, #12]
			
 
				 ; CHECK-NEXT:    isb sy
			
 
				-; CHECK-NEXT:  .LBB0_2:
			
 
				+; CHECK-NEXT:  .LBB0_3:
			
 
				 ; CHECK-NEXT:    ldr r5, [r3, #12]
			
 
				 ; CHECK-NEXT:    cmp r5, #0
			
 
				-; CHECK-NEXT:    bne .LBB0_2
			
 
				+; CHECK-NEXT:    bne .LBB0_3
			
 
				 ; CHECK-NEXT:    ldr r5, [r3, #4]
			
 
				 ; CHECK-NEXT:    str r5, [r1, r4]
			
 
				 ; CHECK-NEXT:    adds r2, r2, #1
			
 
				-; CHECK-NEXT:  .LBB0_4:
			
 
				-; CHECK-NEXT:    cmp r2, #128
			
 
				-; CHECK-NEXT:    bne .LBB0_1
			
 
				+; CHECK-NEXT:    b .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_5:
			
 
				 ; CHECK-NEXT:    movs r0, #0
			
 
				 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
			
 
				 ; CHECK-NEXT:    .p2align 2
			
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -82,14 +82,14 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 
				 ; Check that we sink cold loop blocks after the hot loop body.
			
 
				 ; CHECK-LABEL: test_loop_cold_blocks:
			
 
				 ; CHECK: %entry
			
 
				-; CHECK-NOT: .p2align
			
 
				-; CHECK: %unlikely1
			
 
				-; CHECK-NOT: .p2align
			
 
				-; CHECK: %unlikely2
			
 
				 ; CHECK: .p2align
			
 
				 ; CHECK: %body1
			
 
				 ; CHECK: %body2
			
 
				 ; CHECK: %body3
			
 
				+; CHECK-NOT: .p2align
			
 
				+; CHECK: %unlikely1
			
 
				+; CHECK-NOT: .p2align
			
 
				+; CHECK: %unlikely2
			
 
				 ; CHECK: %exit
			
 
				 
			
 
				 entry:
			
@@ -125,7 +125,7 @@ exit:
 
				   ret i32 %sum
			
 
				 }
			
 
				 
			
 
				-!0 = !{!"branch_weights", i32 4, i32 64}
			
 
				+!0 = !{!"branch_weights", i32 1, i32 64}
			
 
				 
			
 
				 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
			
 
				 ; Check that we sink early exit blocks out of loop bodies.
			
@@ -189,8 +189,8 @@ define i32 @test_loop_rotate(i32 %i, i32* %a) {
 
				 ; loop, eliminating unconditional branches to the top.
			
 
				 ; CHECK-LABEL: test_loop_rotate:
			
 
				 ; CHECK: %entry
			
 
				-; CHECK: %body1
			
 
				 ; CHECK: %body0
			
 
				+; CHECK: %body1
			
 
				 ; CHECK: %exit
			
 
				 
			
 
				 entry:
			
@@ -957,16 +957,15 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
 
				 ; CHECK: %if.else
			
 
				 ; CHECK: %if.end10
			
 
				 ; Second rotated loop top
			
 
				-; CHECK: .p2align
			
 
				-; CHECK: %if.then24
			
 
				 ; CHECK: %while.cond.outer
			
 
				 ; Third rotated loop top
			
 
				 ; CHECK: .p2align
			
 
				+; CHECK: %if.end20
			
 
				 ; CHECK: %while.cond
			
 
				 ; CHECK: %while.body
			
 
				 ; CHECK: %land.lhs.true
			
 
				 ; CHECK: %if.then19
			
 
				-; CHECK: %if.end20
			
 
				+; CHECK: %if.then24
			
 
				 ; CHECK: %if.then8
			
 
				 ; CHECK: ret
			
 
				 
			
@@ -1546,8 +1545,8 @@ define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) {
 
				 ; CHECK-LABEL: not_rotate_if_extra_branch_regression
			
 
				 ; CHECK: %.entry
			
 
				 ; CHECK: %.first_backedge
			
 
				-; CHECK: %.slow
			
 
				 ; CHECK: %.second_header
			
 
				+; CHECK: %.slow
			
 
				 .entry:
			
 
				   %sum.0 = shl nsw i32 %count, 1
			
 
				   br label %.first_header
			
--- a/test/CodeGen/X86/code_placement.ll
+++ b/test/CodeGen/X86/code_placement.ll
@@ -4,6 +4,11 @@
 
				 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
			
 
				 @Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
			
 
				 
			
 
				+; CHECK: %entry
			
 
				+; CHECK: %bb
			
 
				+; CHECK: %bb1
			
 
				+; CHECK: %bb2
			
 
				+
			
 
				 define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
			
 
				 entry:
			
 
				 	%0 = load i32, i32* %rk, align 4		; <i32> [#uses=1]
			
@@ -12,8 +17,6 @@ entry:
 
				 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
			
 
				 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
			
 
				 	br label %bb
			
 
				-; CHECK: jmp
			
 
				-; CHECK-NEXT: align
			
 
				 
			
 
				 bb:		; preds = %bb1, %entry
			
 
				 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
			
--- a/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
+++ b/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
@@ -44,8 +44,8 @@ define void @nested_loop_0(i1 %flag) !prof !1 {
 
				 ; CHECK-LABEL: nested_loop_0:
			
 
				 ; CHECK: callq c
			
 
				 ; CHECK: callq d
			
 
				-; CHECK: callq e
			
 
				 ; CHECK: callq b
			
 
				+; CHECK: callq e
			
 
				 ; CHECK: callq f
			
 
				 
			
 
				 entry:
			
--- a/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
+++ b/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -1,13 +1,12 @@
 
				 ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
			
 
				 
			
 
				 define void @foo() {
			
 
				-; Test that when determining the edge probability from a node in an inner loop
			
 
				-; to a node in an outer loop, the weights on edges in the inner loop should be
			
 
				-; ignored if we are building the chain for the outer loop.
			
 
				+; After moving the latch to the top of loop, there is no fall through from the
			
 
				+; latch to outer loop.
			
 
				 ;
			
 
				 ; CHECK-LABEL: foo:
			
 
				-; CHECK: callq c
			
 
				 ; CHECK: callq b
			
 
				+; CHECK: callq c
			
 
				 
			
 
				 entry:
			
 
				   %call = call zeroext i1 @a()
			
--- a/test/CodeGen/X86/code_placement_loop_rotation2.ll
+++ b/test/CodeGen/X86/code_placement_loop_rotation2.ll
@@ -5,13 +5,13 @@ define void @foo() {
 
				 ; Test a nested loop case when profile data is not available.
			
 
				 ;
			
 
				 ; CHECK-LABEL: foo:
			
 
				+; CHECK: callq g
			
 
				+; CHECK: callq h
			
 
				 ; CHECK: callq b
			
 
				-; CHECK: callq c
			
 
				-; CHECK: callq d
			
 
				 ; CHECK: callq e
			
 
				 ; CHECK: callq f
			
 
				-; CHECK: callq g
			
 
				-; CHECK: callq h
			
 
				+; CHECK: callq c
			
 
				+; CHECK: callq d
			
 
				 
			
 
				 entry:
			
 
				   br label %header
			
@@ -59,13 +59,13 @@ define void @bar() !prof !1 {
 
				 ; Test a nested loop case when profile data is available.
			
 
				 ;
			
 
				 ; CHECK-PROFILE-LABEL: bar:
			
 
				+; CHECK-PROFILE: callq h
			
 
				+; CHECK-PROFILE: callq b
			
 
				+; CHECK-PROFILE: callq g
			
 
				 ; CHECK-PROFILE: callq e
			
 
				 ; CHECK-PROFILE: callq f
			
 
				 ; CHECK-PROFILE: callq c
			
 
				 ; CHECK-PROFILE: callq d
			
 
				-; CHECK-PROFILE: callq h
			
 
				-; CHECK-PROFILE: callq b
			
 
				-; CHECK-PROFILE: callq g
			
 
				 
			
 
				 entry:
			
 
				   br label %header
			
--- a/test/CodeGen/X86/code_placement_no_header_change.ll
+++ b/test/CodeGen/X86/code_placement_no_header_change.ll
@@ -7,9 +7,9 @@ define i32 @bar(i32 %count) {
 
				 ; Later backedge1 and backedge2 is rotated before loop header.
			
 
				 ; CHECK-LABEL: bar
			
 
				 ; CHECK: %.entry
			
 
				+; CHECK: %.header
			
 
				 ; CHECK: %.backedge1
			
 
				 ; CHECK: %.backedge2
			
 
				-; CHECK: %.header
			
 
				 ; CHECK: %.exit
			
 
				 .entry:
			
 
				   %c = shl nsw i32 %count, 2
			
--- a/test/CodeGen/X86/conditional-tailcall.ll
+++ b/test/CodeGen/X86/conditional-tailcall.ll
@@ -258,9 +258,12 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
			
 
				 ; CHECK32-NEXT:    xorl %edi, %edi # encoding: [0x31,0xff]
			
 
				 ; CHECK32-NEXT:    incl %edi # encoding: [0x47]
			
 
				-; CHECK32-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
			
 
				-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
			
 
				-; CHECK32-NEXT:  .LBB3_2: # %for.body
			
 
				+; CHECK32-NEXT:  .LBB3_1: # %for.cond
			
 
				+; CHECK32-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK32-NEXT:    testl %edx, %edx # encoding: [0x85,0xd2]
			
 
				+; CHECK32-NEXT:    je .LBB3_13 # encoding: [0x74,A]
			
 
				+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
			
 
				+; CHECK32-NEXT:  # %bb.2: # %for.body
			
 
				 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
			
 
				 ; CHECK32-NEXT:    je .LBB3_11 # encoding: [0x74,A]
			
@@ -314,12 +317,9 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK32-NEXT:    incl %eax # encoding: [0x40]
			
 
				 ; CHECK32-NEXT:    decl %edx # encoding: [0x4a]
			
 
				-; CHECK32-NEXT:  .LBB3_1: # %for.cond
			
 
				-; CHECK32-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK32-NEXT:    testl %edx, %edx # encoding: [0x85,0xd2]
			
 
				-; CHECK32-NEXT:    jne .LBB3_2 # encoding: [0x75,A]
			
 
				-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
			
 
				-; CHECK32-NEXT:  # %bb.13:
			
 
				+; CHECK32-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
			
 
				+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
			
 
				+; CHECK32-NEXT:  .LBB3_13:
			
 
				 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
			
 
				 ; CHECK32-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
			
 
				 ; CHECK32-NEXT:    jmp .LBB3_14 # encoding: [0xeb,A]
			
@@ -369,56 +369,59 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; CHECK64-NEXT:    .cfi_adjust_cfa_offset 8
			
 
				 ; CHECK64-NEXT:    popq %r8 # encoding: [0x41,0x58]
			
 
				 ; CHECK64-NEXT:    .cfi_adjust_cfa_offset -8
			
 
				-; CHECK64-NEXT:    jmp .LBB3_11 # encoding: [0xeb,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  .LBB3_1: # %for.body
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				-; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
			
 
				-; CHECK64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  .LBB3_1: # %for.cond
			
 
				+; CHECK64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK64-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
			
 
				+; CHECK64-NEXT:    je .LBB3_12 # encoding: [0x74,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
			
 
				 ; CHECK64-NEXT:  # %bb.2: # %for.body
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				-; CHECK64-NEXT:    cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
			
 
				-; CHECK64-NEXT:    je .LBB3_7 # encoding: [0x74,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				+; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
			
 
				+; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				 ; CHECK64-NEXT:  # %bb.3: # %for.body
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				+; CHECK64-NEXT:    cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
			
 
				+; CHECK64-NEXT:    je .LBB3_8 # encoding: [0x74,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  # %bb.4: # %for.body
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
			
 
				-; CHECK64-NEXT:    jne .LBB3_10 # encoding: [0x75,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  # %bb.4: # %sw.bb
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:    jne .LBB3_11 # encoding: [0x75,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  # %bb.5: # %sw.bb
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17]
			
 
				 ; CHECK64-NEXT:    cmpl $43, %edx # encoding: [0x83,0xfa,0x2b]
			
 
				 ; CHECK64-NEXT:    movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
			
 
				-; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  # %bb.5: # %sw.bb
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  # %bb.6: # %sw.bb
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    cmpb $45, %dl # encoding: [0x80,0xfa,0x2d]
			
 
				 ; CHECK64-NEXT:    movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
			
 
				-; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  # %bb.6: # %if.else
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  # %bb.7: # %if.else
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    addl $-48, %edx # encoding: [0x83,0xc2,0xd0]
			
 
				 ; CHECK64-NEXT:    cmpl $10, %edx # encoding: [0x83,0xfa,0x0a]
			
 
				-; CHECK64-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  .LBB3_7: # %sw.bb14
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:    jmp .LBB3_9 # encoding: [0xeb,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:  .LBB3_8: # %sw.bb14
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
			
 
				 ; CHECK64-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
			
 
				 ; CHECK64-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
			
 
				-; CHECK64-NEXT:  .LBB3_8: # %if.else
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:  .LBB3_9: # %if.else
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    movl %r9d, %ecx # encoding: [0x44,0x89,0xc9]
			
 
				-; CHECK64-NEXT:    jb .LBB3_10 # encoding: [0x72,A]
			
 
				-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				+; CHECK64-NEXT:    jb .LBB3_11 # encoding: [0x72,A]
			
 
				+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				 ; CHECK64-NEXT:    jmp .LBB3_13 # encoding: [0xeb,A]
			
 
				 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  .LBB3_9: # %sw.bb22
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:  .LBB3_10: # %sw.bb22
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
			
 
				 ; CHECK64-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
			
 
				 ; CHECK64-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
			
@@ -426,16 +429,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; CHECK64-NEXT:    jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
			
 
				 ; CHECK64-NEXT:    # encoding: [0x73,A]
			
 
				 ; CHECK64-NEXT:    # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  .LBB3_10: # %for.inc
			
 
				-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
			
 
				+; CHECK64-NEXT:  .LBB3_11: # %for.inc
			
 
				+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; CHECK64-NEXT:    incq %rdi # encoding: [0x48,0xff,0xc7]
			
 
				 ; CHECK64-NEXT:    decq %rax # encoding: [0x48,0xff,0xc8]
			
 
				-; CHECK64-NEXT:  .LBB3_11: # %for.cond
			
 
				-; CHECK64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK64-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
			
 
				-; CHECK64-NEXT:    jne .LBB3_1 # encoding: [0x75,A]
			
 
				+; CHECK64-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
			
 
				 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
			
 
				-; CHECK64-NEXT:  # %bb.12:
			
 
				+; CHECK64-NEXT:  .LBB3_12:
			
 
				 ; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
			
 
				 ; CHECK64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
			
 
				 ; CHECK64-NEXT:    # kill: def $al killed $al killed $eax
			
@@ -451,51 +451,54 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; WIN64-NEXT:    movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8]
			
 
				 ; WIN64-NEXT:    leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01]
			
 
				 ; WIN64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
			
 
				-; WIN64-NEXT:    jmp .LBB3_10 # encoding: [0xeb,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  .LBB3_1: # %for.body
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				-; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
			
 
				-; WIN64-NEXT:    je .LBB3_8 # encoding: [0x74,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:  .LBB3_1: # %for.cond
			
 
				+; WIN64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; WIN64-NEXT:    testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
			
 
				+; WIN64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
			
 
				 ; WIN64-NEXT:  # %bb.2: # %for.body
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				-; WIN64-NEXT:    cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
			
 
				-; WIN64-NEXT:    je .LBB3_6 # encoding: [0x74,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				+; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
			
 
				+; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				 ; WIN64-NEXT:  # %bb.3: # %for.body
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				+; WIN64-NEXT:    cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
			
 
				+; WIN64-NEXT:    je .LBB3_7 # encoding: [0x74,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:  # %bb.4: # %for.body
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
			
 
				-; WIN64-NEXT:    jne .LBB3_9 # encoding: [0x75,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  # %bb.4: # %sw.bb
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:    jne .LBB3_10 # encoding: [0x75,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:  # %bb.5: # %sw.bb
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
			
 
				 ; WIN64-NEXT:    cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b]
			
 
				 ; WIN64-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
			
 
				-; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  # %bb.5: # %sw.bb
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:  # %bb.6: # %sw.bb
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d]
			
 
				-; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:    jmp .LBB3_7 # encoding: [0xeb,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  .LBB3_6: # %sw.bb14
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:  .LBB3_7: # %sw.bb14
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
			
 
				-; WIN64-NEXT:  .LBB3_7: # %if.else
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:  .LBB3_8: # %if.else
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
			
 
				 ; WIN64-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
			
 
				 ; WIN64-NEXT:    cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a]
			
 
				-; WIN64-NEXT:    jb .LBB3_9 # encoding: [0x72,A]
			
 
				-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
			
 
				+; WIN64-NEXT:    jb .LBB3_10 # encoding: [0x72,A]
			
 
				+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
			
 
				 ; WIN64-NEXT:    jmp .LBB3_12 # encoding: [0xeb,A]
			
 
				 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  .LBB3_8: # %sw.bb22
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:  .LBB3_9: # %sw.bb22
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
			
 
				 ; WIN64-NEXT:    addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
			
 
				 ; WIN64-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
			
@@ -503,16 +506,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 
				 ; WIN64-NEXT:    jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
			
 
				 ; WIN64-NEXT:    # encoding: [0x73,A]
			
 
				 ; WIN64-NEXT:    # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  .LBB3_9: # %for.inc
			
 
				-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
			
 
				+; WIN64-NEXT:  .LBB3_10: # %for.inc
			
 
				+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
			
 
				 ; WIN64-NEXT:    incq %rcx # encoding: [0x48,0xff,0xc1]
			
 
				 ; WIN64-NEXT:    decq %r8 # encoding: [0x49,0xff,0xc8]
			
 
				-; WIN64-NEXT:  .LBB3_10: # %for.cond
			
 
				-; WIN64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; WIN64-NEXT:    testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
			
 
				-; WIN64-NEXT:    jne .LBB3_1 # encoding: [0x75,A]
			
 
				+; WIN64-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
			
 
				 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
			
 
				-; WIN64-NEXT:  # %bb.11:
			
 
				+; WIN64-NEXT:  .LBB3_11:
			
 
				 ; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
			
 
				 ; WIN64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
			
 
				 ; WIN64-NEXT:    # kill: def $al killed $al killed $eax
			
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -7,12 +7,14 @@
 
				 ; order to avoid a branch within the loop.
			
 
				 
			
 
				 ; CHECK-LABEL: simple:
			
 
				-;      CHECK:   jmp   .LBB0_1
			
 
				-; CHECK-NEXT:   align
			
 
				-; CHECK-NEXT: .LBB0_2:
			
 
				-; CHECK-NEXT:   callq loop_latch
			
 
				+;      CHECK:   align
			
 
				 ; CHECK-NEXT: .LBB0_1:
			
 
				 ; CHECK-NEXT:   callq loop_header
			
 
				+;      CHECK:   js .LBB0_3
			
 
				+; CHECK-NEXT:   callq loop_latch
			
 
				+; CHECK-NEXT:   jmp .LBB0_1
			
 
				+; CHECK-NEXT: .LBB0_3:
			
 
				+; CHECK-NEXT:   callq exit
			
 
				 
			
 
				 define void @simple() nounwind {
			
 
				 entry:
			
@@ -75,17 +77,21 @@ exit:
 
				 ; CHECK-LABEL: yet_more_involved:
			
 
				 ;      CHECK:   jmp .LBB2_1
			
 
				 ; CHECK-NEXT:   align
			
 
				-; CHECK-NEXT: .LBB2_5:
			
 
				-; CHECK-NEXT:   callq block_a_true_func
			
 
				-; CHECK-NEXT:   callq block_a_merge_func
			
 
				-; CHECK-NEXT: .LBB2_1:
			
 
				+
			
 
				+;      CHECK: .LBB2_1:
			
 
				 ; CHECK-NEXT:   callq body
			
 
				-;
			
 
				-; LBB2_4
			
 
				-;      CHECK:   callq bar99
			
 
				+; CHECK-NEXT:   callq get
			
 
				+; CHECK-NEXT:   cmpl $2, %eax
			
 
				+; CHECK-NEXT:   jge .LBB2_2
			
 
				+; CHECK-NEXT:   callq bar99
			
 
				 ; CHECK-NEXT:   callq get
			
 
				 ; CHECK-NEXT:   cmpl $2999, %eax
			
 
				-; CHECK-NEXT:   jle .LBB2_5
			
 
				+; CHECK-NEXT:   jg .LBB2_6
			
 
				+; CHECK-NEXT:   callq block_a_true_func
			
 
				+; CHECK-NEXT:   callq block_a_merge_func
			
 
				+; CHECK-NEXT:   jmp .LBB2_1
			
 
				+; CHECK-NEXT:   align
			
 
				+; CHECK-NEXT: .LBB2_6:
			
 
				 ; CHECK-NEXT:   callq block_a_false_func
			
 
				 ; CHECK-NEXT:   callq block_a_merge_func
			
 
				 ; CHECK-NEXT:   jmp .LBB2_1
			
@@ -201,12 +207,12 @@ block102:
 
				 }
			
 
				 
			
 
				 ; CHECK-LABEL: check_minsize:
			
 
				-;      CHECK:   jmp   .LBB4_1
			
 
				 ; CHECK-NOT:   align
			
 
				-; CHECK-NEXT: .LBB4_2:
			
 
				-; CHECK-NEXT:   callq loop_latch
			
 
				-; CHECK-NEXT: .LBB4_1:
			
 
				+; CHECK:      .LBB4_1:
			
 
				 ; CHECK-NEXT:   callq loop_header
			
 
				+; CHECK:        callq loop_latch
			
 
				+; CHECK:      .LBB4_3:
			
 
				+; CHECK:        callq exit
			
 
				 
			
 
				 
			
 
				 define void @check_minsize() minsize nounwind {
			
--- a/test/CodeGen/X86/loop-rotate.ll
+++ b/test/CodeGen/X86/loop-rotate.ll
@@ -0,0 +1,120 @@
 
				+; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
			
 
				+
			
 
				+; Don't rotate the loop if the number of fall through to exit is not larger
			
 
				+; than the number of fall through to header.
			
 
				+define void @no_rotate() {
			
 
				+; CHECK-LABEL: no_rotate
			
 
				+; CHECK: %entry
			
 
				+; CHECK: %header
			
 
				+; CHECK: %middle
			
 
				+; CHECK: %latch1
			
 
				+; CHECK: %latch2
			
 
				+; CHECK: %end
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %val1 = call i1 @foo()
			
 
				+  br i1 %val1, label %middle, label %end
			
 
				+
			
 
				+middle:
			
 
				+  %val2 = call i1 @foo()
			
 
				+  br i1 %val2, label %latch1, label %end
			
 
				+
			
 
				+latch1:
			
 
				+  %val3 = call i1 @foo()
			
 
				+  br i1 %val3, label %latch2, label %header
			
 
				+
			
 
				+latch2:
			
 
				+  %val4 = call i1 @foo()
			
 
				+  br label %header
			
 
				+
			
 
				+end:
			
 
				+  ret void
			
 
				+}
			
 
				+
			
 
				+define void @do_rotate() {
			
 
				+; CHECK-LABEL: do_rotate
			
 
				+; CHECK: %entry
			
 
				+; CHECK: %then
			
 
				+; CHECK: %else
			
 
				+; CHECK: %latch1
			
 
				+; CHECK: %latch2
			
 
				+; CHECK: %header
			
 
				+; CHECK: %end
			
 
				+entry:
			
 
				+  %val0 = call i1 @foo()
			
 
				+  br i1 %val0, label %then, label %else
			
 
				+
			
 
				+then:
			
 
				+  call void @a()
			
 
				+  br label %header
			
 
				+
			
 
				+else:
			
 
				+  call void @b()
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %val1 = call i1 @foo()
			
 
				+  br i1 %val1, label %latch1, label %end
			
 
				+
			
 
				+latch1:
			
 
				+  %val3 = call i1 @foo()
			
 
				+  br i1 %val3, label %latch2, label %header
			
 
				+
			
 
				+latch2:
			
 
				+  %val4 = call i1 @foo()
			
 
				+  br label %header
			
 
				+
			
 
				+end:
			
 
				+  ret void
			
 
				+}
			
 
				+
			
 
				+; The loop structure is same as in @no_rotate, but the loop header's predecessor
			
 
				+; doesn't fall through to it, so it should be rotated to get exit fall through.
			
 
				+define void @do_rotate2() {
			
 
				+; CHECK-LABEL: do_rotate2
			
 
				+; CHECK: %entry
			
 
				+; CHECK: %then
			
 
				+; CHECK: %middle
			
 
				+; CHECK: %latch1
			
 
				+; CHECK: %latch2
			
 
				+; CHECK: %header
			
 
				+; CHECK: %exit
			
 
				+entry:
			
 
				+  %val0 = call i1 @foo()
			
 
				+  br i1 %val0, label %then, label %header, !prof !1
			
 
				+
			
 
				+then:
			
 
				+  call void @a()
			
 
				+  br label %end
			
 
				+
			
 
				+header:
			
 
				+  %val1 = call i1 @foo()
			
 
				+  br i1 %val1, label %middle, label %exit
			
 
				+
			
 
				+middle:
			
 
				+  %val2 = call i1 @foo()
			
 
				+  br i1 %val2, label %latch1, label %exit
			
 
				+
			
 
				+latch1:
			
 
				+  %val3 = call i1 @foo()
			
 
				+  br i1 %val3, label %latch2, label %header
			
 
				+
			
 
				+latch2:
			
 
				+  %val4 = call i1 @foo()
			
 
				+  br label %header
			
 
				+
			
 
				+exit:
			
 
				+  call void @b()
			
 
				+  br label %end
			
 
				+
			
 
				+end:
			
 
				+  ret void
			
 
				+}
			
 
				+
			
 
				+declare i1 @foo()
			
 
				+declare void @a()
			
 
				+declare void @b()
			
 
				+
			
 
				+!1 = !{!"branch_weights", i32 10, i32 1}
			
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -21,22 +21,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
 
				 ; GENERIC-NEXT:    movq _Te1@{{.*}}(%rip), %r8
			
 
				 ; GENERIC-NEXT:    movq _Te3@{{.*}}(%rip), %r10
			
 
				 ; GENERIC-NEXT:    movq %rcx, %r11
			
 
				-; GENERIC-NEXT:    jmp LBB0_1
			
 
				 ; GENERIC-NEXT:    .p2align 4, 0x90
			
 
				-; GENERIC-NEXT:  LBB0_2: ## %bb1
			
 
				-; GENERIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
			
 
				-; GENERIC-NEXT:    movl %edi, %ebx
			
 
				-; GENERIC-NEXT:    shrl $16, %ebx
			
 
				-; GENERIC-NEXT:    movzbl %bl, %ebx
			
 
				-; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
			
 
				-; GENERIC-NEXT:    xorl -4(%r14), %eax
			
 
				-; GENERIC-NEXT:    shrl $24, %edi
			
 
				-; GENERIC-NEXT:    movzbl %bpl, %ebx
			
 
				-; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
			
 
				-; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
			
 
				-; GENERIC-NEXT:    xorl (%r14), %ebx
			
 
				-; GENERIC-NEXT:    decq %r11
			
 
				-; GENERIC-NEXT:    addq $16, %r14
			
 
				 ; GENERIC-NEXT:  LBB0_1: ## %bb
			
 
				 ; GENERIC-NEXT:    ## =>This Inner Loop Header: Depth=1
			
 
				 ; GENERIC-NEXT:    movzbl %al, %edi
			
@@ -56,8 +41,23 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
 
				 ; GENERIC-NEXT:    shrl $24, %eax
			
 
				 ; GENERIC-NEXT:    movl (%r9,%rax,4), %eax
			
 
				 ; GENERIC-NEXT:    testq %r11, %r11
			
 
				-; GENERIC-NEXT:    jne LBB0_2
			
 
				-; GENERIC-NEXT:  ## %bb.3: ## %bb2
			
 
				+; GENERIC-NEXT:    je LBB0_3
			
 
				+; GENERIC-NEXT:  ## %bb.2: ## %bb1
			
 
				+; GENERIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
			
 
				+; GENERIC-NEXT:    movl %edi, %ebx
			
 
				+; GENERIC-NEXT:    shrl $16, %ebx
			
 
				+; GENERIC-NEXT:    movzbl %bl, %ebx
			
 
				+; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
			
 
				+; GENERIC-NEXT:    xorl -4(%r14), %eax
			
 
				+; GENERIC-NEXT:    shrl $24, %edi
			
 
				+; GENERIC-NEXT:    movzbl %bpl, %ebx
			
 
				+; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
			
 
				+; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
			
 
				+; GENERIC-NEXT:    xorl (%r14), %ebx
			
 
				+; GENERIC-NEXT:    decq %r11
			
 
				+; GENERIC-NEXT:    addq $16, %r14
			
 
				+; GENERIC-NEXT:    jmp LBB0_1
			
 
				+; GENERIC-NEXT:  LBB0_3: ## %bb2
			
 
				 ; GENERIC-NEXT:    shlq $4, %rcx
			
 
				 ; GENERIC-NEXT:    andl $-16777216, %eax ## imm = 0xFF000000
			
 
				 ; GENERIC-NEXT:    movl %edi, %ebx
			
@@ -105,21 +105,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
 
				 ; ATOM-NEXT:    movq _Te3@{{.*}}(%rip), %r10
			
 
				 ; ATOM-NEXT:    decl %ecx
			
 
				 ; ATOM-NEXT:    movq %rcx, %r11
			
 
				-; ATOM-NEXT:    jmp LBB0_1
			
 
				 ; ATOM-NEXT:    .p2align 4, 0x90
			
 
				-; ATOM-NEXT:  LBB0_2: ## %bb1
			
 
				-; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
			
 
				-; ATOM-NEXT:    shrl $16, %eax
			
 
				-; ATOM-NEXT:    shrl $24, %edi
			
 
				-; ATOM-NEXT:    decq %r11
			
 
				-; ATOM-NEXT:    movzbl %al, %ebp
			
 
				-; ATOM-NEXT:    movzbl %bl, %eax
			
 
				-; ATOM-NEXT:    movl (%r10,%rax,4), %eax
			
 
				-; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
			
 
				-; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
			
 
				-; ATOM-NEXT:    xorl -4(%r14), %r15d
			
 
				-; ATOM-NEXT:    xorl (%r14), %eax
			
 
				-; ATOM-NEXT:    addq $16, %r14
			
 
				 ; ATOM-NEXT:  LBB0_1: ## %bb
			
 
				 ; ATOM-NEXT:    ## =>This Inner Loop Header: Depth=1
			
 
				 ; ATOM-NEXT:    movl %eax, %edi
			
@@ -140,8 +126,22 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
 
				 ; ATOM-NEXT:    movl (%r9,%rax,4), %r15d
			
 
				 ; ATOM-NEXT:    testq %r11, %r11
			
 
				 ; ATOM-NEXT:    movl %edi, %eax
			
 
				-; ATOM-NEXT:    jne LBB0_2
			
 
				-; ATOM-NEXT:  ## %bb.3: ## %bb2
			
 
				+; ATOM-NEXT:    je LBB0_3
			
 
				+; ATOM-NEXT:  ## %bb.2: ## %bb1
			
 
				+; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
			
 
				+; ATOM-NEXT:    shrl $16, %eax
			
 
				+; ATOM-NEXT:    shrl $24, %edi
			
 
				+; ATOM-NEXT:    decq %r11
			
 
				+; ATOM-NEXT:    movzbl %al, %ebp
			
 
				+; ATOM-NEXT:    movzbl %bl, %eax
			
 
				+; ATOM-NEXT:    movl (%r10,%rax,4), %eax
			
 
				+; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
			
 
				+; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
			
 
				+; ATOM-NEXT:    xorl -4(%r14), %r15d
			
 
				+; ATOM-NEXT:    xorl (%r14), %eax
			
 
				+; ATOM-NEXT:    addq $16, %r14
			
 
				+; ATOM-NEXT:    jmp LBB0_1
			
 
				+; ATOM-NEXT:  LBB0_3: ## %bb2
			
 
				 ; ATOM-NEXT:    shrl $16, %eax
			
 
				 ; ATOM-NEXT:    shrl $8, %edi
			
 
				 ; ATOM-NEXT:    movzbl %bl, %ebp
			
--- a/test/CodeGen/X86/move_latch_to_loop_top.ll
+++ b/test/CodeGen/X86/move_latch_to_loop_top.ll
@@ -0,0 +1,239 @@
 
				+; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
			
 
				+
			
 
				+; The block latch should be moved before header.
			
 
				+;CHECK-LABEL: test1:
			
 
				+;CHECK:       %latch
			
 
				+;CHECK:       %header
			
 
				+;CHECK:       %false
			
 
				+define i32 @test1(i32* %p) {
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %x1 = phi i64 [0, %entry], [%x2, %latch]
			
 
				+  %count1 = phi i32 [0, %entry], [%count4, %latch]
			
 
				+  %0 = ptrtoint i32* %p to i64
			
 
				+  %1 = add i64 %0, %x1
			
 
				+  %2 = inttoptr i64 %1 to i32*
			
 
				+  %data = load i32, i32* %2
			
 
				+  %3 = icmp eq i32 %data, 0
			
 
				+  br i1 %3, label %latch, label %false
			
 
				+
			
 
				+false:
			
 
				+  %count2 = add i32 %count1, 1
			
 
				+  br label %latch
			
 
				+
			
 
				+latch:
			
 
				+  %count4 = phi i32 [%count2, %false], [%count1, %header]
			
 
				+  %x2 = add i64 %x1, 1
			
 
				+  %4 = icmp eq i64 %x2, 100
			
 
				+  br i1 %4, label %exit, label %header
			
 
				+
			
 
				+exit:
			
 
				+  ret i32 %count4
			
 
				+}
			
 
				+
			
 
				+; The block latch and one of false/true should be moved before header.
			
 
				+;CHECK-LABEL: test2:
			
 
				+;CHECK:       %true
			
 
				+;CHECK:       %latch
			
 
				+;CHECK:       %header
			
 
				+;CHECK:       %false
			
 
				+define i32 @test2(i32* %p) {
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %x1 = phi i64 [0, %entry], [%x2, %latch]
			
 
				+  %count1 = phi i32 [0, %entry], [%count4, %latch]
			
 
				+  %0 = ptrtoint i32* %p to i64
			
 
				+  %1 = add i64 %0, %x1
			
 
				+  %2 = inttoptr i64 %1 to i32*
			
 
				+  %data = load i32, i32* %2
			
 
				+  %3 = icmp eq i32 %data, 0
			
 
				+  br i1 %3, label %true, label %false
			
 
				+
			
 
				+false:
			
 
				+  %count2 = add i32 %count1, 1
			
 
				+  br label %latch
			
 
				+
			
 
				+true:
			
 
				+  %count3 = add i32 %count1, 2
			
 
				+  br label %latch
			
 
				+
			
 
				+latch:
			
 
				+  %count4 = phi i32 [%count2, %false], [%count3, %true]
			
 
				+  %x2 = add i64 %x1, 1
			
 
				+  %4 = icmp eq i64 %x2, 100
			
 
				+  br i1 %4, label %exit, label %header
			
 
				+
			
 
				+exit:
			
 
				+  ret i32 %count4
			
 
				+}
			
 
				+
			
 
				+; More blocks can be moved before header.
			
 
				+;            header <------------
			
 
				+;              /\               |
			
 
				+;             /  \              |
			
 
				+;            /    \             |
			
 
				+;           /      \            |
			
 
				+;          /        \           |
			
 
				+;        true      false        |
			
 
				+;         /\         /\         |
			
 
				+;        /  \       /  \        |
			
 
				+;       /    \     /    \       |
			
 
				+;    true3 false3 /      \      |
			
 
				+;      \    /   true2  false2   |
			
 
				+;       \  /      \      /      |
			
 
				+;        \/        \    /       |
			
 
				+;      endif3       \  /        |
			
 
				+;         \          \/         |
			
 
				+;          \       endif2       |
			
 
				+;           \        /          |
			
 
				+;            \      /           |
			
 
				+;             \    /            |
			
 
				+;              \  /             |
			
 
				+;               \/              |
			
 
				+;              latch-------------
			
 
				+;                |
			
 
				+;                |
			
 
				+;              exit
			
 
				+;
			
 
				+; Blocks true3,endif3,latch should be moved before header.
			
 
				+;
			
 
				+;CHECK-LABEL: test3:
			
 
				+;CHECK:       %true3
			
 
				+;CHECK:       %endif3
			
 
				+;CHECK:       %latch
			
 
				+;CHECK:       %header
			
 
				+;CHECK:       %false
			
 
				+define i32 @test3(i32* %p) {
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %x1 = phi i64 [0, %entry], [%x2, %latch]
			
 
				+  %count1 = phi i32 [0, %entry], [%count12, %latch]
			
 
				+  %0 = ptrtoint i32* %p to i64
			
 
				+  %1 = add i64 %0, %x1
			
 
				+  %2 = inttoptr i64 %1 to i32*
			
 
				+  %data = load i32, i32* %2
			
 
				+  %3 = icmp eq i32 %data, 0
			
 
				+  br i1 %3, label %true, label %false, !prof !3
			
 
				+
			
 
				+false:
			
 
				+  %count2 = add i32 %count1, 1
			
 
				+  %cond = icmp sgt i32 %count2, 10
			
 
				+  br i1 %cond, label %true2, label %false2
			
 
				+
			
 
				+false2:
			
 
				+  %count3 = and i32 %count2, 7
			
 
				+  br label %endif2
			
 
				+
			
 
				+true2:
			
 
				+  %count4 = mul i32 %count2, 3
			
 
				+  br label %endif2
			
 
				+
			
 
				+endif2:
			
 
				+  %count5 = phi i32 [%count3, %false2], [%count4, %true2]
			
 
				+  %count6 = sub i32 %count5, 5
			
 
				+  br label %latch
			
 
				+
			
 
				+true:
			
 
				+  %count7 = add i32 %count1, 2
			
 
				+  %cond2 = icmp slt i32 %count7, 20
			
 
				+  br i1 %cond2, label %true3, label %false3
			
 
				+
			
 
				+false3:
			
 
				+  %count8 = or i32 %count7, 3
			
 
				+  br label %endif3
			
 
				+
			
 
				+true3:
			
 
				+  %count9 = xor i32 %count7, 55
			
 
				+  br label %endif3
			
 
				+
			
 
				+endif3:
			
 
				+  %count10 = phi i32 [%count8, %false3], [%count9, %true3]
			
 
				+  %count11 = add i32 %count10, 3
			
 
				+  br label %latch
			
 
				+
			
 
				+latch:
			
 
				+  %count12 = phi i32 [%count6, %endif2], [%count11, %endif3]
			
 
				+  %x2 = add i64 %x1, 1
			
 
				+  %4 = icmp eq i64 %x2, 100
			
 
				+  br i1 %4, label %exit, label %header
			
 
				+
			
 
				+exit:
			
 
				+  ret i32 %count12
			
 
				+}
			
 
				+
			
 
				+; The exit block has higher frequency than false block, so latch block
			
 
				+; should not moved before header.
			
 
				+;CHECK-LABEL: test4:
			
 
				+;CHECK:       %header
			
 
				+;CHECK:       %true
			
 
				+;CHECK:       %latch
			
 
				+;CHECK:       %false
			
 
				+;CHECK:       %exit
			
 
				+define i32 @test4(i32 %t, i32* %p) {
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %x1 = phi i64 [0, %entry], [%x2, %latch]
			
 
				+  %count1 = phi i32 [0, %entry], [%count4, %latch]
			
 
				+  %0 = ptrtoint i32* %p to i64
			
 
				+  %1 = add i64 %0, %x1
			
 
				+  %2 = inttoptr i64 %1 to i32*
			
 
				+  %data = load i32, i32* %2
			
 
				+  %3 = icmp eq i32 %data, 0
			
 
				+  br i1 %3, label %true, label %false, !prof !1
			
 
				+
			
 
				+false:
			
 
				+  %count2 = add i32 %count1, 1
			
 
				+  br label %latch
			
 
				+
			
 
				+true:
			
 
				+  %count3 = add i32 %count1, 2
			
 
				+  br label %latch
			
 
				+
			
 
				+latch:
			
 
				+  %count4 = phi i32 [%count2, %false], [%count3, %true]
			
 
				+  %x2 = add i64 %x1, 1
			
 
				+  %4 = icmp eq i64 %x2, 100
			
 
				+  br i1 %4, label %exit, label %header, !prof !2
			
 
				+
			
 
				+exit:
			
 
				+  ret i32 %count4
			
 
				+}
			
 
				+
			
 
				+!1 = !{!"branch_weights", i32 100, i32 1}
			
 
				+!2 = !{!"branch_weights", i32 16, i32 16}
			
 
				+!3 = !{!"branch_weights", i32 51, i32 49}
			
 
				+
			
 
				+; If move latch to loop top doesn't reduce taken branch, don't do it.
			
 
				+;CHECK-LABEL: test5:
			
 
				+;CHECK:       %entry
			
 
				+;CHECK:       %header
			
 
				+;CHECK:       %latch
			
 
				+define void @test5(i32* %p) {
			
 
				+entry:
			
 
				+  br label %header
			
 
				+
			
 
				+header:
			
 
				+  %x1 = phi i64 [0, %entry], [%x1, %header], [%x2, %latch]
			
 
				+  %0 = ptrtoint i32* %p to i64
			
 
				+  %1 = add i64 %0, %x1
			
 
				+  %2 = inttoptr i64 %1 to i32*
			
 
				+  %data = load i32, i32* %2
			
 
				+  %3 = icmp eq i32 %data, 0
			
 
				+  br i1 %3, label %latch, label %header
			
 
				+
			
 
				+latch:
			
 
				+  %x2 = add i64 %x1, 1
			
 
				+  br label %header
			
 
				+
			
 
				+exit:
			
 
				+  ret void
			
 
				+}
			
 
				+
			
--- a/test/CodeGen/X86/pr38185.ll
+++ b/test/CodeGen/X86/pr38185.ll
@@ -5,9 +5,13 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) {
 
				 ; CHECK-LABEL: foo:
			
 
				 ; CHECK:       # %bb.0:
			
 
				 ; CHECK-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %body
			
 
				+; CHECK-NEXT:  .LBB0_1: # %loop
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
			
 
				+; CHECK-NEXT:    cmpq %rcx, %r9
			
 
				+; CHECK-NEXT:    je .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %body
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl $1, (%rdx,%r9,4)
			
 
				 ; CHECK-NEXT:    movzbl (%rdi,%r9,4), %r8d
			
@@ -17,12 +21,8 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) {
 
				 ; CHECK-NEXT:    movl %eax, (%rdi,%r9,4)
			
 
				 ; CHECK-NEXT:    incq %r9
			
 
				 ; CHECK-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %loop
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
			
 
				-; CHECK-NEXT:    cmpq %rcx, %r9
			
 
				-; CHECK-NEXT:    jne .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %endloop
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %endloop
			
 
				 ; CHECK-NEXT:    retq
			
 
				 %i = alloca i64
			
 
				 store i64 0, i64* %i
			
--- a/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -103,6 +103,34 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 
				 ; CHECK-NEXT:    xorl %r13d, %r13d
			
 
				 ; CHECK-NEXT:    jmp LBB0_13
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				+; CHECK-NEXT:  LBB0_20: ## %sw.bb256
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    movl %r14d, %r13d
			
 
				+; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    decl %r15d
			
 
				+; CHECK-NEXT:    testl %r15d, %r15d
			
 
				+; CHECK-NEXT:    movl %r13d, %r14d
			
 
				+; CHECK-NEXT:    jle LBB0_22
			
 
				+; CHECK-NEXT:  LBB0_13: ## %while.body200
			
 
				+; CHECK-NEXT:    ## =>This Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    ## Child Loop BB0_30 Depth 2
			
 
				+; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
			
 
				+; CHECK-NEXT:    leal -268(%r14), %eax
			
 
				+; CHECK-NEXT:    cmpl $105, %eax
			
 
				+; CHECK-NEXT:    ja LBB0_14
			
 
				+; CHECK-NEXT:  ## %bb.56: ## %while.body200
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    movslq (%rdi,%rax,4), %rax
			
 
				+; CHECK-NEXT:    addq %rdi, %rax
			
 
				+; CHECK-NEXT:    jmpq *%rax
			
 
				+; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    testb %dl, %dl
			
 
				+; CHECK-NEXT:    movl %r14d, %r13d
			
 
				+; CHECK-NEXT:    jne LBB0_21
			
 
				+; CHECK-NEXT:    jmp LBB0_55
			
 
				+; CHECK-NEXT:    .p2align 4, 0x90
			
 
				 ; CHECK-NEXT:  LBB0_14: ## %while.body200
			
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				 ; CHECK-NEXT:    leal 1(%r14), %eax
			
@@ -118,12 +146,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				 ; CHECK-NEXT:    movl $1, %r13d
			
 
				 ; CHECK-NEXT:    jmp LBB0_21
			
 
				-; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    testb %dl, %dl
			
 
				-; CHECK-NEXT:    movl %r14d, %r13d
			
 
				-; CHECK-NEXT:    jne LBB0_21
			
 
				-; CHECK-NEXT:    jmp LBB0_55
			
 
				 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
			
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				 ; CHECK-NEXT:    testb %dl, %dl
			
@@ -137,30 +159,52 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 
				 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
			
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				 ; CHECK-NEXT:    ## implicit-def: $rax
			
 
				+; CHECK-NEXT:    testb %al, %al
			
 
				+; CHECK-NEXT:    jns LBB0_30
			
 
				+; CHECK-NEXT:    jmp LBB0_55
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  LBB0_29: ## %land.rhs485
			
 
				-; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
			
 
				+; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
			
 
				+; CHECK-NEXT:    leaq 1(%r12), %rax
			
 
				+; CHECK-NEXT:    testb %dl, %dl
			
 
				+; CHECK-NEXT:    je LBB0_33
			
 
				+; CHECK-NEXT:  ## %bb.29: ## %land.rhs485
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
			
 
				 ; CHECK-NEXT:    testb %al, %al
			
 
				 ; CHECK-NEXT:    js LBB0_55
			
 
				-; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
			
 
				+; CHECK-NEXT:  LBB0_30: ## %cond.true.i.i2780
			
 
				+; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
			
 
				 ; CHECK-NEXT:    movq %rax, %r12
			
 
				 ; CHECK-NEXT:    testb %dl, %dl
			
 
				 ; CHECK-NEXT:    jne LBB0_32
			
 
				 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
			
 
				 ; CHECK-NEXT:    movl $256, %esi ## imm = 0x100
			
 
				 ; CHECK-NEXT:    callq ___maskrune
			
 
				 ; CHECK-NEXT:    xorl %edx, %edx
			
 
				 ; CHECK-NEXT:    testb %dl, %dl
			
 
				-; CHECK-NEXT:    je LBB0_34
			
 
				-; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
			
 
				-; CHECK-NEXT:    leaq 1(%r12), %rax
			
 
				-; CHECK-NEXT:    testb %dl, %dl
			
 
				-; CHECK-NEXT:    jne LBB0_29
			
 
				-; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
			
 
				+; CHECK-NEXT:    jne LBB0_32
			
 
				+; CHECK-NEXT:    jmp LBB0_34
			
 
				+; CHECK-NEXT:  LBB0_45: ## %sw.bb1134
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
			
 
				+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
			
 
				+; CHECK-NEXT:    cmpq %rax, %rcx
			
 
				+; CHECK-NEXT:    jb LBB0_55
			
 
				+; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    xorl %ebp, %ebp
			
 
				+; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
			
 
				+; CHECK-NEXT:    jmp LBB0_21
			
 
				+; CHECK-NEXT:  LBB0_19: ## %sw.bb243
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    movl $2, %r13d
			
 
				+; CHECK-NEXT:    jmp LBB0_21
			
 
				+; CHECK-NEXT:  LBB0_40: ## %sw.bb566
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				+; CHECK-NEXT:    movl $20, %r13d
			
 
				+; CHECK-NEXT:    jmp LBB0_21
			
 
				+; CHECK-NEXT:  LBB0_33: ## %if.end517.loopexitsplit
			
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				 ; CHECK-NEXT:    incq %r12
			
 
				 ; CHECK-NEXT:  LBB0_34: ## %if.end517
			
@@ -199,47 +243,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 
				 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
			
 
				 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
			
 
				 ; CHECK-NEXT:    jmp LBB0_21
			
 
				-; CHECK-NEXT:  LBB0_45: ## %sw.bb1134
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
			
 
				-; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
			
 
				-; CHECK-NEXT:    cmpq %rax, %rcx
			
 
				-; CHECK-NEXT:    jb LBB0_55
			
 
				-; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    xorl %ebp, %ebp
			
 
				-; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
			
 
				-; CHECK-NEXT:    jmp LBB0_21
			
 
				-; CHECK-NEXT:  LBB0_19: ## %sw.bb243
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    movl $2, %r13d
			
 
				-; CHECK-NEXT:    jmp LBB0_21
			
 
				-; CHECK-NEXT:  LBB0_40: ## %sw.bb566
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    movl $20, %r13d
			
 
				-; CHECK-NEXT:    jmp LBB0_21
			
 
				-; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  LBB0_13: ## %while.body200
			
 
				-; CHECK-NEXT:    ## =>This Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
			
 
				-; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
			
 
				-; CHECK-NEXT:    leal -268(%r14), %eax
			
 
				-; CHECK-NEXT:    cmpl $105, %eax
			
 
				-; CHECK-NEXT:    ja LBB0_14
			
 
				-; CHECK-NEXT:  ## %bb.56: ## %while.body200
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    movslq (%rdi,%rax,4), %rax
			
 
				-; CHECK-NEXT:    addq %rdi, %rax
			
 
				-; CHECK-NEXT:    jmpq *%rax
			
 
				-; CHECK-NEXT:  LBB0_20: ## %sw.bb256
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    movl %r14d, %r13d
			
 
				-; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
			
 
				-; CHECK-NEXT:    decl %r15d
			
 
				-; CHECK-NEXT:    testl %r15d, %r15d
			
 
				-; CHECK-NEXT:    movl %r13d, %r14d
			
 
				-; CHECK-NEXT:    jg LBB0_13
			
 
				-; CHECK-NEXT:    jmp LBB0_22
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				 ; CHECK-NEXT:  LBB0_42: ## %while.cond864
			
 
				 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
			
--- a/test/CodeGen/X86/reverse_branches.ll
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -85,25 +85,36 @@ define i32 @test_branches_order() uwtable ssp {
 
				 ; CHECK-NEXT:    jg LBB0_16
			
 
				 ; CHECK-NEXT:  LBB0_9: ## %for.cond18.preheader
			
 
				 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    ## Child Loop BB0_10 Depth 2
			
 
				+; CHECK-NEXT:    ## Child Loop BB0_11 Depth 2
			
 
				 ; CHECK-NEXT:    ## Child Loop BB0_12 Depth 3
			
 
				 ; CHECK-NEXT:    movq %rcx, %rdx
			
 
				 ; CHECK-NEXT:    xorl %esi, %esi
			
 
				 ; CHECK-NEXT:    xorl %edi, %edi
			
 
				+; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
			
 
				+; CHECK-NEXT:    jle LBB0_11
			
 
				+; CHECK-NEXT:    jmp LBB0_15
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  LBB0_10: ## %for.cond18
			
 
				+; CHECK-NEXT:  LBB0_14: ## %exit
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_11 Depth=2
			
 
				+; CHECK-NEXT:    addq %rsi, %rbp
			
 
				+; CHECK-NEXT:    incq %rdi
			
 
				+; CHECK-NEXT:    decq %rsi
			
 
				+; CHECK-NEXT:    addq $1001, %rdx ## imm = 0x3E9
			
 
				+; CHECK-NEXT:    cmpq $-1000, %rbp ## imm = 0xFC18
			
 
				+; CHECK-NEXT:    jne LBB0_5
			
 
				+; CHECK-NEXT:  ## %bb.10: ## %for.cond18
			
 
				+; CHECK-NEXT:    ## in Loop: Header=BB0_11 Depth=2
			
 
				+; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
			
 
				+; CHECK-NEXT:    jg LBB0_15
			
 
				+; CHECK-NEXT:  LBB0_11: ## %for.body20
			
 
				 ; CHECK-NEXT:    ## Parent Loop BB0_9 Depth=1
			
 
				 ; CHECK-NEXT:    ## => This Loop Header: Depth=2
			
 
				 ; CHECK-NEXT:    ## Child Loop BB0_12 Depth 3
			
 
				-; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
			
 
				-; CHECK-NEXT:    jg LBB0_15
			
 
				-; CHECK-NEXT:  ## %bb.11: ## %for.body20
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_10 Depth=2
			
 
				 ; CHECK-NEXT:    movq $-1000, %rbp ## imm = 0xFC18
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				 ; CHECK-NEXT:  LBB0_12: ## %do.body.i
			
 
				 ; CHECK-NEXT:    ## Parent Loop BB0_9 Depth=1
			
 
				-; CHECK-NEXT:    ## Parent Loop BB0_10 Depth=2
			
 
				+; CHECK-NEXT:    ## Parent Loop BB0_11 Depth=2
			
 
				 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=3
			
 
				 ; CHECK-NEXT:    cmpb $120, 1000(%rdx,%rbp)
			
 
				 ; CHECK-NEXT:    je LBB0_14
			
@@ -111,16 +122,6 @@ define i32 @test_branches_order() uwtable ssp {
 
				 ; CHECK-NEXT:    ## in Loop: Header=BB0_12 Depth=3
			
 
				 ; CHECK-NEXT:    incq %rbp
			
 
				 ; CHECK-NEXT:    jne LBB0_12
			
 
				-; CHECK-NEXT:    jmp LBB0_5
			
 
				-; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  LBB0_14: ## %exit
			
 
				-; CHECK-NEXT:    ## in Loop: Header=BB0_10 Depth=2
			
 
				-; CHECK-NEXT:    addq %rsi, %rbp
			
 
				-; CHECK-NEXT:    incq %rdi
			
 
				-; CHECK-NEXT:    decq %rsi
			
 
				-; CHECK-NEXT:    addq $1001, %rdx ## imm = 0x3E9
			
 
				-; CHECK-NEXT:    cmpq $-1000, %rbp ## imm = 0xFC18
			
 
				-; CHECK-NEXT:    je LBB0_10
			
 
				 ; CHECK-NEXT:  LBB0_5: ## %if.then
			
 
				 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
			
 
				 ; CHECK-NEXT:    callq _puts
			
--- a/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/test/CodeGen/X86/speculative-load-hardening.ll
@@ -215,10 +215,7 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp
 
				 ; X64-NEXT:    movl %esi, %ebp
			
 
				 ; X64-NEXT:    cmovneq %r15, %rax
			
 
				 ; X64-NEXT:    xorl %ebx, %ebx
			
 
				-; X64-NEXT:    jmp .LBB2_3
			
 
				 ; X64-NEXT:    .p2align 4, 0x90
			
 
				-; X64-NEXT:  .LBB2_6: # in Loop: Header=BB2_3 Depth=1
			
 
				-; X64-NEXT:    cmovgeq %r15, %rax
			
 
				 ; X64-NEXT:  .LBB2_3: # %l.header
			
 
				 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; X64-NEXT:    movslq (%r12), %rcx
			
@@ -237,8 +234,11 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp
 
				 ; X64-NEXT:    cmovneq %r15, %rax
			
 
				 ; X64-NEXT:    incl %ebx
			
 
				 ; X64-NEXT:    cmpl %ebp, %ebx
			
 
				-; X64-NEXT:    jl .LBB2_6
			
 
				-; X64-NEXT:  # %bb.4:
			
 
				+; X64-NEXT:    jge .LBB2_4
			
 
				+; X64-NEXT:  # %bb.6: # in Loop: Header=BB2_3 Depth=1
			
 
				+; X64-NEXT:    cmovgeq %r15, %rax
			
 
				+; X64-NEXT:    jmp .LBB2_3
			
 
				+; X64-NEXT:  .LBB2_4:
			
 
				 ; X64-NEXT:    cmovlq %r15, %rax
			
 
				 ; X64-NEXT:  .LBB2_5: # %exit
			
 
				 ; X64-NEXT:    shlq $47, %rax
			
@@ -328,20 +328,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
 
				 ; X64-NEXT:    xorl %r13d, %r13d
			
 
				 ; X64-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
			
 
				 ; X64-NEXT:    testl %r15d, %r15d
			
 
				-; X64-NEXT:    jg .LBB3_5
			
 
				-; X64-NEXT:    jmp .LBB3_4
			
 
				-; X64-NEXT:    .p2align 4, 0x90
			
 
				-; X64-NEXT:  .LBB3_12:
			
 
				-; X64-NEXT:    cmovgeq %rbp, %rax
			
 
				-; X64-NEXT:    testl %r15d, %r15d
			
 
				 ; X64-NEXT:    jle .LBB3_4
			
 
				+; X64-NEXT:    .p2align 4, 0x90
			
 
				 ; X64-NEXT:  .LBB3_5: # %l2.header.preheader
			
 
				 ; X64-NEXT:    cmovleq %rbp, %rax
			
 
				 ; X64-NEXT:    xorl %r15d, %r15d
			
 
				-; X64-NEXT:    jmp .LBB3_6
			
 
				 ; X64-NEXT:    .p2align 4, 0x90
			
 
				-; X64-NEXT:  .LBB3_11: # in Loop: Header=BB3_6 Depth=1
			
 
				-; X64-NEXT:    cmovgeq %rbp, %rax
			
 
				 ; X64-NEXT:  .LBB3_6: # %l2.header
			
 
				 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; X64-NEXT:    movslq (%rbx), %rcx
			
@@ -360,8 +352,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
 
				 ; X64-NEXT:    cmovneq %rbp, %rax
			
 
				 ; X64-NEXT:    incl %r15d
			
 
				 ; X64-NEXT:    cmpl %r12d, %r15d
			
 
				-; X64-NEXT:    jl .LBB3_11
			
 
				-; X64-NEXT:  # %bb.7:
			
 
				+; X64-NEXT:    jge .LBB3_7
			
 
				+; X64-NEXT:  # %bb.11: # in Loop: Header=BB3_6 Depth=1
			
 
				+; X64-NEXT:    cmovgeq %rbp, %rax
			
 
				+; X64-NEXT:    jmp .LBB3_6
			
 
				+; X64-NEXT:    .p2align 4, 0x90
			
 
				+; X64-NEXT:  .LBB3_7:
			
 
				 ; X64-NEXT:    cmovlq %rbp, %rax
			
 
				 ; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
			
 
				 ; X64-NEXT:    jmp .LBB3_8
			
@@ -385,8 +381,13 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
 
				 ; X64-NEXT:    cmovneq %rbp, %rax
			
 
				 ; X64-NEXT:    incl %r13d
			
 
				 ; X64-NEXT:    cmpl %r15d, %r13d
			
 
				-; X64-NEXT:    jl .LBB3_12
			
 
				-; X64-NEXT:  # %bb.9:
			
 
				+; X64-NEXT:    jge .LBB3_9
			
 
				+; X64-NEXT:  # %bb.12:
			
 
				+; X64-NEXT:    cmovgeq %rbp, %rax
			
 
				+; X64-NEXT:    testl %r15d, %r15d
			
 
				+; X64-NEXT:    jg .LBB3_5
			
 
				+; X64-NEXT:    jmp .LBB3_4
			
 
				+; X64-NEXT:  .LBB3_9:
			
 
				 ; X64-NEXT:    cmovlq %rbp, %rax
			
 
				 ; X64-NEXT:  .LBB3_10: # %exit
			
 
				 ; X64-NEXT:    shlq $47, %rax
			
@@ -418,7 +419,17 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
 
				 ; X64-LFENCE-NEXT:    movl %esi, %r15d
			
 
				 ; X64-LFENCE-NEXT:    lfence
			
 
				 ; X64-LFENCE-NEXT:    xorl %r12d, %r12d
			
 
				+; X64-LFENCE-NEXT:    jmp .LBB3_2
			
 
				 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
			
 
				+; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
			
 
				+; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
			
 
				+; X64-LFENCE-NEXT:    lfence
			
 
				+; X64-LFENCE-NEXT:    movslq (%rbx), %rax
			
 
				+; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
			
 
				+; X64-LFENCE-NEXT:    callq sink
			
 
				+; X64-LFENCE-NEXT:    incl %r12d
			
 
				+; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
			
 
				+; X64-LFENCE-NEXT:    jge .LBB3_6
			
 
				 ; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
			
 
				 ; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
			
 
				 ; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
			
@@ -440,15 +451,7 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
 
				 ; X64-LFENCE-NEXT:    incl %ebp
			
 
				 ; X64-LFENCE-NEXT:    cmpl %r13d, %ebp
			
 
				 ; X64-LFENCE-NEXT:    jl .LBB3_4
			
 
				-; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
			
 
				-; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
			
 
				-; X64-LFENCE-NEXT:    lfence
			
 
				-; X64-LFENCE-NEXT:    movslq (%rbx), %rax
			
 
				-; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
			
 
				-; X64-LFENCE-NEXT:    callq sink
			
 
				-; X64-LFENCE-NEXT:    incl %r12d
			
 
				-; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
			
 
				-; X64-LFENCE-NEXT:    jl .LBB3_2
			
 
				+; X64-LFENCE-NEXT:    jmp .LBB3_5
			
 
				 ; X64-LFENCE-NEXT:  .LBB3_6: # %exit
			
 
				 ; X64-LFENCE-NEXT:    lfence
			
 
				 ; X64-LFENCE-NEXT:    addq $8, %rsp
			
--- a/test/CodeGen/X86/swifterror.ll
+++ b/test/CodeGen/X86/swifterror.ll
@@ -1,6 +1,6 @@
 
				-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s
			
 
				-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin -O0 | FileCheck --check-prefix=CHECK-O0 %s
			
 
				-; RUN: llc < %s -verify-machineinstrs -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s
			
 
				+; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-APPLE %s
			
 
				+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
			
 
				+; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-i386 %s
			
 
				 
			
 
				 declare i8* @malloc(i64)
			
 
				 declare void @free(i8*)
			
--- a/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -12,14 +12,17 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
			
 
				 ; CHECK-NEXT:    incq %rsi
			
 
				 ; CHECK-NEXT:    testl %edi, %edi
			
 
				-; CHECK-NEXT:    jne .LBB0_2
			
 
				-; CHECK-NEXT:    jmp .LBB0_5
			
 
				+; CHECK-NEXT:    je .LBB0_5
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				+; CHECK-NEXT:  .LBB0_2: # %inner_loop_top
			
 
				+; CHECK-NEXT:    # =>This Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
			
 
				+; CHECK-NEXT:    cmpb $0, (%rsi)
			
 
				+; CHECK-NEXT:    js .LBB0_3
			
 
				 ; CHECK-NEXT:  .LBB0_4: # %inner_loop_latch
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
			
 
				+; CHECK-NEXT:    # Parent Loop BB0_2 Depth=1
			
 
				+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
			
 
				 ; CHECK-NEXT:    addq $2, %rsi
			
 
				-; CHECK-NEXT:  .LBB0_2: # %inner_loop_top
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; CHECK-NEXT:    cmpb $0, (%rsi)
			
 
				 ; CHECK-NEXT:    jns .LBB0_4
			
 
				 ; CHECK-NEXT:    jmp .LBB0_3
			
@@ -130,58 +133,58 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 
				 ; CHECK-NEXT:    testl %ebp, %ebp
			
 
				 ; CHECK-NEXT:    je .LBB1_18
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB1_8: # %shared_loop_header
			
 
				+; CHECK-NEXT:  .LBB1_9: # %shared_loop_header
			
 
				 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; CHECK-NEXT:    testq %rbx, %rbx
			
 
				 ; CHECK-NEXT:    jne .LBB1_27
			
 
				-; CHECK-NEXT:  # %bb.9: # %inner_loop_body
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:  # %bb.10: # %inner_loop_body
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    testl %eax, %eax
			
 
				-; CHECK-NEXT:    jns .LBB1_8
			
 
				-; CHECK-NEXT:  # %bb.10: # %if.end96.i
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    jns .LBB1_9
			
 
				+; CHECK-NEXT:  # %bb.11: # %if.end96.i
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    cmpl $3, %ebp
			
 
				 ; CHECK-NEXT:    jae .LBB1_22
			
 
				-; CHECK-NEXT:  # %bb.11: # %if.end287.i
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:  # %bb.12: # %if.end287.i
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    xorl %esi, %esi
			
 
				 ; CHECK-NEXT:    cmpl $1, %ebp
			
 
				 ; CHECK-NEXT:    setne %dl
			
 
				 ; CHECK-NEXT:    testb %al, %al
			
 
				-; CHECK-NEXT:    jne .LBB1_15
			
 
				-; CHECK-NEXT:  # %bb.12: # %if.end308.i
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    jne .LBB1_16
			
 
				+; CHECK-NEXT:  # %bb.13: # %if.end308.i
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    testb %al, %al
			
 
				-; CHECK-NEXT:    je .LBB1_17
			
 
				-; CHECK-NEXT:  # %bb.13: # %if.end335.i
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    je .LBB1_7
			
 
				+; CHECK-NEXT:  # %bb.14: # %if.end335.i
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    xorl %edx, %edx
			
 
				 ; CHECK-NEXT:    testb %dl, %dl
			
 
				 ; CHECK-NEXT:    movl $0, %esi
			
 
				-; CHECK-NEXT:    jne .LBB1_7
			
 
				-; CHECK-NEXT:  # %bb.14: # %merge_other
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    jne .LBB1_8
			
 
				+; CHECK-NEXT:  # %bb.15: # %merge_other
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    xorl %esi, %esi
			
 
				-; CHECK-NEXT:    jmp .LBB1_16
			
 
				-; CHECK-NEXT:  .LBB1_15: # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    jmp .LBB1_17
			
 
				+; CHECK-NEXT:  .LBB1_16: # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    movb %dl, %sil
			
 
				 ; CHECK-NEXT:    addl $3, %esi
			
 
				-; CHECK-NEXT:  .LBB1_16: # %outer_loop_latch
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:  .LBB1_17: # %outer_loop_latch
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    # implicit-def: $dl
			
 
				-; CHECK-NEXT:    jmp .LBB1_7
			
 
				-; CHECK-NEXT:  .LBB1_17: # %merge_predecessor_split
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:    jmp .LBB1_8
			
 
				+; CHECK-NEXT:  .LBB1_7: # %merge_predecessor_split
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    movb $32, %dl
			
 
				 ; CHECK-NEXT:    xorl %esi, %esi
			
 
				-; CHECK-NEXT:  .LBB1_7: # %outer_loop_latch
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
			
 
				+; CHECK-NEXT:  .LBB1_8: # %outer_loop_latch
			
 
				+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
			
 
				 ; CHECK-NEXT:    movzwl %si, %esi
			
 
				 ; CHECK-NEXT:    decl %esi
			
 
				 ; CHECK-NEXT:    movzwl %si, %esi
			
 
				 ; CHECK-NEXT:    leaq 1(%rcx,%rsi), %rcx
			
 
				 ; CHECK-NEXT:    testl %ebp, %ebp
			
 
				-; CHECK-NEXT:    jne .LBB1_8
			
 
				+; CHECK-NEXT:    jne .LBB1_9
			
 
				 ; CHECK-NEXT:  .LBB1_18: # %while.cond.us1412.i
			
 
				 ; CHECK-NEXT:    xorl %eax, %eax
			
 
				 ; CHECK-NEXT:    testb %al, %al
			
--- a/test/CodeGen/X86/tail-dup-repeat.ll
+++ b/test/CodeGen/X86/tail-dup-repeat.ll
@@ -10,35 +10,30 @@
 
				 define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 {
			
 
				 ; CHECK-LABEL: repeated_tail_dup:
			
 
				 ; CHECK:       # %bb.0: # %entry
			
 
				-; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_1: # %for.cond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; CHECK-NEXT:    testb $1, %dil
			
 
				 ; CHECK-NEXT:    je .LBB0_3
			
 
				-; CHECK-NEXT:  # %bb.2: # %land.lhs.true
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				-; CHECK-NEXT:    movl $10, (%rdx)
			
 
				-; CHECK-NEXT:    jmp .LBB0_6
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				+; CHECK-NEXT:  .LBB0_2: # %land.lhs.true
			
 
				+; CHECK-NEXT:    movl $10, (%rdx)
			
 
				+; CHECK-NEXT:  .LBB0_6: # %dup2
			
 
				+; CHECK-NEXT:    movl $2, (%rcx)
			
 
				+; CHECK-NEXT:    testl %r9d, %r9d
			
 
				+; CHECK-NEXT:    jne .LBB0_8
			
 
				+; CHECK-NEXT:  .LBB0_1: # %for.cond
			
 
				+; CHECK-NEXT:    testb $1, %dil
			
 
				+; CHECK-NEXT:    jne .LBB0_2
			
 
				 ; CHECK-NEXT:  .LBB0_3: # %if.end56
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    testb $1, %sil
			
 
				 ; CHECK-NEXT:    je .LBB0_5
			
 
				 ; CHECK-NEXT:  # %bb.4: # %if.then64
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movb $1, (%r8)
			
 
				 ; CHECK-NEXT:    testl %r9d, %r9d
			
 
				 ; CHECK-NEXT:    je .LBB0_1
			
 
				 ; CHECK-NEXT:    jmp .LBB0_8
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				 ; CHECK-NEXT:  .LBB0_5: # %if.end70
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl $12, (%rdx)
			
 
				-; CHECK-NEXT:  .LBB0_6: # %dup2
			
 
				-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				-; CHECK-NEXT:    movl $2, (%rcx)
			
 
				-; CHECK-NEXT:    testl %r9d, %r9d
			
 
				-; CHECK-NEXT:    je .LBB0_1
			
 
				+; CHECK-NEXT:    jmp .LBB0_6
			
 
				 ; CHECK-NEXT:  .LBB0_8: # %for.end
			
 
				 ; CHECK-NEXT:    retq
			
 
				 entry:
			
--- a/test/CodeGen/X86/vector-shift-by-select-loop.ll
+++ b/test/CodeGen/X86/vector-shift-by-select-loop.ll
@@ -136,8 +136,17 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; SSE-NEXT:    jne .LBB0_4
			
 
				 ; SSE-NEXT:  # %bb.5: # %middle.block
			
 
				 ; SSE-NEXT:    cmpq %rax, %rdx
			
 
				-; SSE-NEXT:    je .LBB0_9
			
 
				+; SSE-NEXT:    jne .LBB0_6
			
 
				+; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				+; SSE-NEXT:    retq
			
 
				 ; SSE-NEXT:    .p2align 4, 0x90
			
 
				+; SSE-NEXT:  .LBB0_8: # %for.body
			
 
				+; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				+; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				+; SSE-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				+; SSE-NEXT:    incq %rdx
			
 
				+; SSE-NEXT:    cmpq %rdx, %rax
			
 
				+; SSE-NEXT:    je .LBB0_9
			
 
				 ; SSE-NEXT:  .LBB0_6: # %for.body
			
 
				 ; SSE-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; SSE-NEXT:    cmpb $0, (%rsi,%rdx)
			
@@ -146,15 +155,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; SSE-NEXT:  # %bb.7: # %for.body
			
 
				 ; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				 ; SSE-NEXT:    movl %r8d, %ecx
			
 
				-; SSE-NEXT:  .LBB0_8: # %for.body
			
 
				-; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				-; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				-; SSE-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				-; SSE-NEXT:    incq %rdx
			
 
				-; SSE-NEXT:    cmpq %rdx, %rax
			
 
				-; SSE-NEXT:    jne .LBB0_6
			
 
				-; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				-; SSE-NEXT:    retq
			
 
				+; SSE-NEXT:    jmp .LBB0_8
			
 
				 ;
			
 
				 ; AVX1-LABEL: vector_variable_shift_left_loop:
			
 
				 ; AVX1:       # %bb.0: # %entry
			
@@ -258,8 +259,18 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; AVX1-NEXT:    jne .LBB0_4
			
 
				 ; AVX1-NEXT:  # %bb.5: # %middle.block
			
 
				 ; AVX1-NEXT:    cmpq %rax, %rdx
			
 
				-; AVX1-NEXT:    je .LBB0_9
			
 
				+; AVX1-NEXT:    jne .LBB0_6
			
 
				+; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				+; AVX1-NEXT:    vzeroupper
			
 
				+; AVX1-NEXT:    retq
			
 
				 ; AVX1-NEXT:    .p2align 4, 0x90
			
 
				+; AVX1-NEXT:  .LBB0_8: # %for.body
			
 
				+; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				+; AVX1-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				+; AVX1-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				+; AVX1-NEXT:    incq %rdx
			
 
				+; AVX1-NEXT:    cmpq %rdx, %rax
			
 
				+; AVX1-NEXT:    je .LBB0_9
			
 
				 ; AVX1-NEXT:  .LBB0_6: # %for.body
			
 
				 ; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; AVX1-NEXT:    cmpb $0, (%rsi,%rdx)
			
@@ -268,16 +279,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; AVX1-NEXT:  # %bb.7: # %for.body
			
 
				 ; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				 ; AVX1-NEXT:    movl %r8d, %ecx
			
 
				-; AVX1-NEXT:  .LBB0_8: # %for.body
			
 
				-; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				-; AVX1-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				-; AVX1-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				-; AVX1-NEXT:    incq %rdx
			
 
				-; AVX1-NEXT:    cmpq %rdx, %rax
			
 
				-; AVX1-NEXT:    jne .LBB0_6
			
 
				-; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				-; AVX1-NEXT:    vzeroupper
			
 
				-; AVX1-NEXT:    retq
			
 
				+; AVX1-NEXT:    jmp .LBB0_8
			
 
				 ;
			
 
				 ; AVX2-LABEL: vector_variable_shift_left_loop:
			
 
				 ; AVX2:       # %bb.0: # %entry
			
@@ -332,8 +334,18 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; AVX2-NEXT:    jne .LBB0_4
			
 
				 ; AVX2-NEXT:  # %bb.5: # %middle.block
			
 
				 ; AVX2-NEXT:    cmpq %rax, %rdx
			
 
				-; AVX2-NEXT:    je .LBB0_9
			
 
				+; AVX2-NEXT:    jne .LBB0_6
			
 
				+; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				+; AVX2-NEXT:    vzeroupper
			
 
				+; AVX2-NEXT:    retq
			
 
				 ; AVX2-NEXT:    .p2align 4, 0x90
			
 
				+; AVX2-NEXT:  .LBB0_8: # %for.body
			
 
				+; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				+; AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				+; AVX2-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				+; AVX2-NEXT:    incq %rdx
			
 
				+; AVX2-NEXT:    cmpq %rdx, %rax
			
 
				+; AVX2-NEXT:    je .LBB0_9
			
 
				 ; AVX2-NEXT:  .LBB0_6: # %for.body
			
 
				 ; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				 ; AVX2-NEXT:    cmpb $0, (%rsi,%rdx)
			
@@ -342,16 +354,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 
				 ; AVX2-NEXT:  # %bb.7: # %for.body
			
 
				 ; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				 ; AVX2-NEXT:    movl %r8d, %ecx
			
 
				-; AVX2-NEXT:  .LBB0_8: # %for.body
			
 
				-; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
			
 
				-; AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
			
 
				-; AVX2-NEXT:    shll %cl, (%rdi,%rdx,4)
			
 
				-; AVX2-NEXT:    incq %rdx
			
 
				-; AVX2-NEXT:    cmpq %rdx, %rax
			
 
				-; AVX2-NEXT:    jne .LBB0_6
			
 
				-; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
			
 
				-; AVX2-NEXT:    vzeroupper
			
 
				-; AVX2-NEXT:    retq
			
 
				+; AVX2-NEXT:    jmp .LBB0_8
			
 
				 entry:
			
 
				   %cmp12 = icmp sgt i32 %count, 0
			
 
				   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
			
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -8,9 +8,13 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    movl $0, (%esp)
			
 
				 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
			
 
				 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %forbody
			
 
				+; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movl (%esp), %eax
			
 
				+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				+; CHECK-NEXT:    jge .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %forbody
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl (%esp), %eax
			
 
				 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
			
@@ -21,12 +25,8 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    pshufb %xmm1, %xmm2
			
 
				 ; CHECK-NEXT:    pextrw $0, %xmm2, (%ecx,%eax,4)
			
 
				 ; CHECK-NEXT:    incl (%esp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movl (%esp), %eax
			
 
				-; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				-; CHECK-NEXT:    jl .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %afterfor
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; CHECK-NEXT:    addl $12, %esp
			
 
				 ; CHECK-NEXT:    retl
			
 
				 entry:
			
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -10,9 +10,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    movl $0, (%esp)
			
 
				 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
			
 
				 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %forbody
			
 
				+; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movl (%esp), %eax
			
 
				+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				+; CHECK-NEXT:    jge .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %forbody
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl (%esp), %eax
			
 
				 ; CHECK-NEXT:    leal (,%eax,8), %ecx
			
@@ -27,12 +31,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    packuswb %xmm0, %xmm2
			
 
				 ; CHECK-NEXT:    movq %xmm2, (%edx,%eax,8)
			
 
				 ; CHECK-NEXT:    incl (%esp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movl (%esp), %eax
			
 
				-; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				-; CHECK-NEXT:    jl .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %afterfor
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; CHECK-NEXT:    addl $12, %esp
			
 
				 ; CHECK-NEXT:    retl
			
 
				 entry:
			
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -18,9 +18,13 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    movw $1, {{[0-9]+}}(%esp)
			
 
				 ; CHECK-NEXT:    movl $65537, {{[0-9]+}}(%esp) # imm = 0x10001
			
 
				 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %forbody
			
 
				+; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				+; CHECK-NEXT:    cmpl 16(%ebp), %eax
			
 
				+; CHECK-NEXT:    jge .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %forbody
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				 ; CHECK-NEXT:    movl 12(%ebp), %edx
			
@@ -31,12 +35,8 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    pshufb %xmm1, %xmm2
			
 
				 ; CHECK-NEXT:    movd %xmm2, (%ecx,%eax,8)
			
 
				 ; CHECK-NEXT:    incl {{[0-9]+}}(%esp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				-; CHECK-NEXT:    cmpl 16(%ebp), %eax
			
 
				-; CHECK-NEXT:    jl .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %afterfor
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; CHECK-NEXT:    movl %ebp, %esp
			
 
				 ; CHECK-NEXT:    popl %ebp
			
 
				 ; CHECK-NEXT:    retl
			
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -16,9 +16,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
 
				 ; SSE2-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
			
 
				 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
			
 
				 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = <2,4,2,2,2,u,u,u>
			
 
				-; SSE2-NEXT:    jmp .LBB0_1
			
 
				 ; SSE2-NEXT:    .p2align 4, 0x90
			
 
				-; SSE2-NEXT:  .LBB0_2: # %forbody
			
 
				+; SSE2-NEXT:  .LBB0_1: # %forcond
			
 
				+; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				+; SSE2-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				+; SSE2-NEXT:    jge .LBB0_3
			
 
				+; SSE2-NEXT:  # %bb.2: # %forbody
			
 
				 ; SSE2-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; SSE2-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
			
 
				 ; SSE2-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
			
@@ -31,12 +35,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
 
				 ; SSE2-NEXT:    pextrw $4, %xmm2, %edx
			
 
				 ; SSE2-NEXT:    movw %dx, 8(%rcx,%rax)
			
 
				 ; SSE2-NEXT:    incl -{{[0-9]+}}(%rsp)
			
 
				-; SSE2-NEXT:  .LBB0_1: # %forcond
			
 
				-; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				-; SSE2-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				-; SSE2-NEXT:    jl .LBB0_2
			
 
				-; SSE2-NEXT:  # %bb.3: # %afterfor
			
 
				+; SSE2-NEXT:    jmp .LBB0_1
			
 
				+; SSE2-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; SSE2-NEXT:    retq
			
 
				 ;
			
 
				 ; SSE41-LABEL: update:
			
@@ -49,9 +49,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
 
				 ; SSE41-NEXT:    movw $0, -{{[0-9]+}}(%rsp)
			
 
				 ; SSE41-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
			
 
				 ; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
			
 
				-; SSE41-NEXT:    jmp .LBB0_1
			
 
				 ; SSE41-NEXT:    .p2align 4, 0x90
			
 
				-; SSE41-NEXT:  .LBB0_2: # %forbody
			
 
				+; SSE41-NEXT:  .LBB0_1: # %forcond
			
 
				+; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; SSE41-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				+; SSE41-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				+; SSE41-NEXT:    jge .LBB0_3
			
 
				+; SSE41-NEXT:  # %bb.2: # %forbody
			
 
				 ; SSE41-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; SSE41-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
			
 
				 ; SSE41-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
			
@@ -66,12 +70,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
 
				 ; SSE41-NEXT:    pextrw $4, %xmm1, 8(%rcx,%rax)
			
 
				 ; SSE41-NEXT:    movq %xmm2, (%rcx,%rax)
			
 
				 ; SSE41-NEXT:    incl -{{[0-9]+}}(%rsp)
			
 
				-; SSE41-NEXT:  .LBB0_1: # %forcond
			
 
				-; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; SSE41-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				-; SSE41-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				-; SSE41-NEXT:    jl .LBB0_2
			
 
				-; SSE41-NEXT:  # %bb.3: # %afterfor
			
 
				+; SSE41-NEXT:    jmp .LBB0_1
			
 
				+; SSE41-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; SSE41-NEXT:    retq
			
 
				 entry:
			
 
				 	%dst.addr = alloca <5 x i16>*
			
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -14,9 +14,13 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    movl $1, -{{[0-9]+}}(%rsp)
			
 
				 ; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
			
 
				 ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = <3,3,3,u>
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %forbody
			
 
				+; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				+; CHECK-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				+; CHECK-NEXT:    jge .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %forbody
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
			
 
				 ; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
			
@@ -28,12 +32,8 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    pextrd $2, %xmm1, 8(%rcx,%rax)
			
 
				 ; CHECK-NEXT:    movq %xmm1, (%rcx,%rax)
			
 
				 ; CHECK-NEXT:    incl -{{[0-9]+}}(%rsp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
			
 
				-; CHECK-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
			
 
				-; CHECK-NEXT:    jl .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %afterfor
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; CHECK-NEXT:    retq
			
 
				 entry:
			
 
				 	%dst.addr = alloca <3 x i32>*
			
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -15,9 +15,13 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000
			
 
				 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
			
 
				 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u>
			
 
				-; CHECK-NEXT:    jmp .LBB0_1
			
 
				 ; CHECK-NEXT:    .p2align 4, 0x90
			
 
				-; CHECK-NEXT:  .LBB0_2: # %forbody
			
 
				+; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				+; CHECK-NEXT:    cmpl 16(%ebp), %eax
			
 
				+; CHECK-NEXT:    jge .LBB0_3
			
 
				+; CHECK-NEXT:  # %bb.2: # %forbody
			
 
				 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				 ; CHECK-NEXT:    movl 8(%ebp), %ecx
			
@@ -30,12 +34,8 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
 
				 ; CHECK-NEXT:    extractps $1, %xmm1, 4(%ecx,%eax)
			
 
				 ; CHECK-NEXT:    movss %xmm1, (%ecx,%eax)
			
 
				 ; CHECK-NEXT:    incl {{[0-9]+}}(%esp)
			
 
				-; CHECK-NEXT:  .LBB0_1: # %forcond
			
 
				-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
			
 
				-; CHECK-NEXT:    cmpl 16(%ebp), %eax
			
 
				-; CHECK-NEXT:    jl .LBB0_2
			
 
				-; CHECK-NEXT:  # %bb.3: # %afterfor
			
 
				+; CHECK-NEXT:    jmp .LBB0_1
			
 
				+; CHECK-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; CHECK-NEXT:    movl %ebp, %esp
			
 
				 ; CHECK-NEXT:    popl %ebp
			
 
				 ; CHECK-NEXT:    retl
			
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -11,9 +11,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; NARROW-NEXT:    movl $0, (%esp)
			
 
				 ; NARROW-NEXT:    pcmpeqd %xmm0, %xmm0
			
 
				 ; NARROW-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
			
 
				-; NARROW-NEXT:    jmp .LBB0_1
			
 
				 ; NARROW-NEXT:    .p2align 4, 0x90
			
 
				-; NARROW-NEXT:  .LBB0_2: # %forbody
			
 
				+; NARROW-NEXT:  .LBB0_1: # %forcond
			
 
				+; NARROW-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; NARROW-NEXT:    movl (%esp), %eax
			
 
				+; NARROW-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				+; NARROW-NEXT:    jge .LBB0_3
			
 
				+; NARROW-NEXT:  # %bb.2: # %forbody
			
 
				 ; NARROW-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; NARROW-NEXT:    movl (%esp), %eax
			
 
				 ; NARROW-NEXT:    leal (,%eax,8), %ecx
			
@@ -30,12 +34,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; NARROW-NEXT:    pshufb %xmm1, %xmm2
			
 
				 ; NARROW-NEXT:    movq %xmm2, (%edx,%eax,8)
			
 
				 ; NARROW-NEXT:    incl (%esp)
			
 
				-; NARROW-NEXT:  .LBB0_1: # %forcond
			
 
				-; NARROW-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; NARROW-NEXT:    movl (%esp), %eax
			
 
				-; NARROW-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				-; NARROW-NEXT:    jl .LBB0_2
			
 
				-; NARROW-NEXT:  # %bb.3: # %afterfor
			
 
				+; NARROW-NEXT:    jmp .LBB0_1
			
 
				+; NARROW-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; NARROW-NEXT:    addl $12, %esp
			
 
				 ; NARROW-NEXT:    retl
			
 
				 ;
			
@@ -46,9 +46,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; WIDE-NEXT:    pcmpeqd %xmm0, %xmm0
			
 
				 ; WIDE-NEXT:    movdqa {{.*#+}} xmm1 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
			
 
				 ; WIDE-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
			
 
				-; WIDE-NEXT:    jmp .LBB0_1
			
 
				 ; WIDE-NEXT:    .p2align 4, 0x90
			
 
				-; WIDE-NEXT:  .LBB0_2: # %forbody
			
 
				+; WIDE-NEXT:  .LBB0_1: # %forcond
			
 
				+; WIDE-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				+; WIDE-NEXT:    movl (%esp), %eax
			
 
				+; WIDE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				+; WIDE-NEXT:    jge .LBB0_3
			
 
				+; WIDE-NEXT:  # %bb.2: # %forbody
			
 
				 ; WIDE-NEXT:    # in Loop: Header=BB0_1 Depth=1
			
 
				 ; WIDE-NEXT:    movl (%esp), %eax
			
 
				 ; WIDE-NEXT:    leal (,%eax,8), %ecx
			
@@ -65,12 +69,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 
				 ; WIDE-NEXT:    psubb %xmm2, %xmm3
			
 
				 ; WIDE-NEXT:    movq %xmm3, (%edx,%eax,8)
			
 
				 ; WIDE-NEXT:    incl (%esp)
			
 
				-; WIDE-NEXT:  .LBB0_1: # %forcond
			
 
				-; WIDE-NEXT:    # =>This Inner Loop Header: Depth=1
			
 
				-; WIDE-NEXT:    movl (%esp), %eax
			
 
				-; WIDE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
			
 
				-; WIDE-NEXT:    jl .LBB0_2
			
 
				-; WIDE-NEXT:  # %bb.3: # %afterfor
			
 
				+; WIDE-NEXT:    jmp .LBB0_1
			
 
				+; WIDE-NEXT:  .LBB0_3: # %afterfor
			
 
				 ; WIDE-NEXT:    addl $12, %esp
			
 
				 ; WIDE-NEXT:    retl
			
 
				 entry:
			
--- a/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,4 +1,4 @@
 
				-; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s
			
 
				+; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
			
 
				 
			
 
				 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
			
 
				 ;; This test checks that x86-cmov-converter optimization transform CMOV
			
--- a/test/DebugInfo/X86/PR37234.ll
+++ b/test/DebugInfo/X86/PR37234.ll
@@ -21,18 +21,18 @@
 
				 ; CHECK-LABEL: # %bb.{{.*}}:
			
 
				 ; CHECK:        #DEBUG_VALUE: main:aa <- 0
			
 
				 ; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
			
 
				-; CHECK: 	jmp	.LBB0_1
			
 
				-; CHECK: .LBB0_2:
			
 
				+; CHECK: .LBB0_1:
			
 
				+; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				+; CHECK:        je      .LBB0_4
			
 
				+; CHECK: # %bb.{{.*}}:
			
 
				 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				 ; CHECK:        jne     .LBB0_1
			
 
				 ; CHECK: # %bb.{{.*}}:
			
 
				 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				 ; CHECK:        incl    %[[REG]]
			
 
				 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				-; CHECK: .LBB0_1:
			
 
				-; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				-; CHECK:        jne     .LBB0_2
			
 
				-; CHECK: # %bb.{{.*}}:
			
 
				+; CHECK:        jmp     .LBB0_1
			
 
				+; CHECK: .LBB0_4:
			
 
				 ; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG]]
			
 
				 ; CHECK: 	retq
			
 
				 
			
--- a/test/DebugInfo/X86/dbg-value-transfer-order.ll
+++ b/test/DebugInfo/X86/dbg-value-transfer-order.ll
@@ -24,6 +24,12 @@
 
				 ; with the Orders insertion point vector.
			
 
				 
			
 
				 ; CHECK-LABEL: f: # @f
			
 
				+; CHECK: .LBB0_4:
			
 
				+;        Check that this DEBUG_VALUE comes before the left shift.
			
 
				+; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
			
 
				+; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
			
 
				+; CHECK:         movl    $1, %[[reg:[^ ]*]]
			
 
				+; CHECK:         shll    %cl, %[[reg]]
			
 
				 ; CHECK: .LBB0_2:                                # %while.body
			
 
				 ; CHECK:         movl    $32, %ecx
			
 
				 ; CHECK:         testl   {{.*}}
			
@@ -31,12 +37,7 @@
 
				 ; CHECK: # %bb.3:                                 # %if.then
			
 
				 ; CHECK:         callq   if_then
			
 
				 ; CHECK:         movl    %eax, %ecx
			
 
				-; CHECK: .LBB0_4:                                # %if.end
			
 
				-;        Check that this DEBUG_VALUE comes before the left shift.
			
 
				-; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
			
 
				-; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
			
 
				-; CHECK:         movl    $1, %[[reg:[^ ]*]]
			
 
				-; CHECK:         shll    %cl, %[[reg]]
			
 
				+; CHECK:         jmp     .LBB0_4
			
 
				 
			
 
				 ; ModuleID = 't.c'
			
 
				 source_filename = "t.c"