Jelajahi Sumber

Update branch coalescing to be a PowerPC specific pass

Implementing this pass as a PowerPC specific pass.  Branch coalescing utilizes
the analyzeBranch method which currently does not include any implicit operands.
This is not an issue on PPC but must be handled on other targets.

Pass is currently off by default. Enabled via -enable-ppc-branch-coalesce.

Differential Revision : https: // reviews.llvm.org/D32776

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313061 91177308-0d34-0410-b5e6-96231b3b80d8
Lei Huang 8 tahun lalu
induk
melakukan
c64508a42f

+ 0 - 3
include/llvm/CodeGen/Passes.h

@@ -409,9 +409,6 @@ namespace llvm {
   /// This pass frees the memory occupied by the MachineFunction.
   FunctionPass *createFreeMachineFunctionPass();
 
-  /// This pass combine basic blocks guarded by the same branch.
-  extern char &BranchCoalescingID;
-
   /// This pass performs outlining on machine instructions directly before
   /// printing assembly.
   ModulePass *createMachineOutlinerPass();

+ 0 - 1
include/llvm/InitializePasses.h

@@ -76,7 +76,6 @@ void initializeBasicAAWrapperPassPass(PassRegistry&);
 void initializeBlockExtractorPassPass(PassRegistry&);
 void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
 void initializeBoundsCheckingPass(PassRegistry&);
-void initializeBranchCoalescingPass(PassRegistry&);
 void initializeBranchFolderPassPass(PassRegistry&);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
 void initializeBranchRelaxationPass(PassRegistry&);

+ 0 - 1
lib/CodeGen/CMakeLists.txt

@@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen
   Analysis.cpp
   AtomicExpandPass.cpp
   BasicTargetTransformInfo.cpp
-  BranchCoalescing.cpp
   BranchFolding.cpp
   BranchRelaxation.cpp
   BuiltinGCs.cpp

+ 0 - 1
lib/CodeGen/CodeGen.cpp

@@ -21,7 +21,6 @@ using namespace llvm;
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
-  initializeBranchCoalescingPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeBranchRelaxationPass(Registry);
   initializeCodeGenPreparePass(Registry);

+ 0 - 3
lib/CodeGen/TargetPassConfig.cpp

@@ -927,9 +927,6 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&MachineLICMID, false);
   addPass(&MachineCSEID, false);
 
-  // Coalesce basic blocks with the same branch condition
-  addPass(&BranchCoalescingID);
-
   addPass(&MachineSinkingID);
 
   addPass(&PeepholeOptimizerID);

+ 1 - 0
lib/Target/PowerPC/CMakeLists.txt

@@ -16,6 +16,7 @@ add_llvm_target(PowerPCCodeGen
   PPCBoolRetToInt.cpp
   PPCAsmPrinter.cpp
   PPCBranchSelector.cpp
+  PPCBranchCoalescing.cpp
   PPCCCState.cpp
   PPCCTRLoops.cpp
   PPCHazardRecognizers.cpp

+ 1 - 0
lib/Target/PowerPC/PPC.h

@@ -41,6 +41,7 @@ namespace llvm {
   FunctionPass *createPPCVSXSwapRemovalPass();
   FunctionPass *createPPCMIPeepholePass();
   FunctionPass *createPPCBranchSelectionPass();
+  FunctionPass *createPPCBranchCoalescingPass();
   FunctionPass *createPPCQPXLoadSplatPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
   FunctionPass *createPPCTLSDynamicCallPass();

+ 62 - 36
lib/CodeGen/BranchCoalescing.cpp → lib/Target/PowerPC/PPCBranchCoalescing.cpp

@@ -13,6 +13,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "PPC.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -27,18 +28,18 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "branch-coalescing"
-
-static cl::opt<cl::boolOrDefault>
-    EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
-                           cl::desc("enable coalescing of duplicate branches"));
+#define DEBUG_TYPE "ppc-branch-coalescing"
 
 STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
 STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
 STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
 
+namespace llvm {
+    void initializePPCBranchCoalescingPass(PassRegistry&);
+}
+
 //===----------------------------------------------------------------------===//
-//                               BranchCoalescing
+//                               PPCBranchCoalescing
 //===----------------------------------------------------------------------===//
 ///
 /// Improve scheduling by coalescing branches that depend on the same condition.
@@ -46,13 +47,17 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
 /// and attempts to merge the blocks together. Such opportunities arise from
 /// the expansion of select statements in the IR.
 ///
-/// For example, consider the following LLVM IR:
+/// This pass does not handle implicit operands on branch statements. In order
+/// to run on targets that use implicit operands, changes need to be made in the
+/// canCoalesceBranch and canMerge methods.
 ///
-/// %test = icmp eq i32 %x 0
-/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
-/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+/// Example: the following LLVM IR
 ///
-/// This IR expands to the following machine code on PowerPC:
+///     %test = icmp eq i32 %x 0
+///     %tmp1 = select i1 %test, double %a, double 2.000000e-03
+///     %tmp2 = select i1 %test, double %b, double 5.000000e-03
+///
+/// expands to the following machine code:
 ///
 /// BB#0: derived from LLVM BB %entry
 ///    Live Ins: %F1 %F3 %X6
@@ -132,7 +137,7 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
 
 namespace {
 
-class BranchCoalescing : public MachineFunctionPass {
+class PPCBranchCoalescing : public MachineFunctionPass {
   struct CoalescingCandidateInfo {
     MachineBasicBlock *BranchBlock;       // Block containing the branch
     MachineBasicBlock *BranchTargetBlock; // Block branched to
@@ -157,15 +162,11 @@ class BranchCoalescing : public MachineFunctionPass {
   bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
                           CoalescingCandidateInfo &TargetRegion) const;
 
-  static bool isBranchCoalescingEnabled() {
-    return EnableBranchCoalescing == cl::BOU_TRUE;
-  }
-
 public:
   static char ID;
 
-  BranchCoalescing() : MachineFunctionPass(ID) {
-    initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
+  PPCBranchCoalescing() : MachineFunctionPass(ID) {
+    initializePPCBranchCoalescingPass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -190,21 +191,25 @@ public:
 };
 } // End anonymous namespace.
 
-char BranchCoalescing::ID = 0;
-char &llvm::BranchCoalescingID = BranchCoalescing::ID;
+char PPCBranchCoalescing::ID = 0;
+/// createPPCBranchCoalescingPass - returns an instance of the Branch Coalescing
+/// Pass
+FunctionPass *llvm::createPPCBranchCoalescingPass() {
+  return new PPCBranchCoalescing();
+}
 
-INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(PPCBranchCoalescing, DEBUG_TYPE,
                       "Branch Coalescing", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
+INITIALIZE_PASS_END(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
                     false, false)
 
-BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
+PPCBranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
     : BranchBlock(nullptr), BranchTargetBlock(nullptr),
       FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
 
-void BranchCoalescing::CoalescingCandidateInfo::clear() {
+void PPCBranchCoalescing::CoalescingCandidateInfo::clear() {
   BranchBlock = nullptr;
   BranchTargetBlock = nullptr;
   FallThroughBlock = nullptr;
@@ -213,7 +218,7 @@ void BranchCoalescing::CoalescingCandidateInfo::clear() {
   MustMoveUp = false;
 }
 
-void BranchCoalescing::initialize(MachineFunction &MF) {
+void PPCBranchCoalescing::initialize(MachineFunction &MF) {
   MDT = &getAnalysis<MachineDominatorTree>();
   MPDT = &getAnalysis<MachinePostDominatorTree>();
   TII = MF.getSubtarget().getInstrInfo();
@@ -230,7 +235,7 @@ void BranchCoalescing::initialize(MachineFunction &MF) {
 ///\param[in,out] Cand The coalescing candidate to analyze
 ///\return true if and only if the branch can be coalesced, false otherwise
 ///
-bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
+bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
   DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
                << " can be coalesced:");
   MachineBasicBlock *FalseMBB = nullptr;
@@ -246,6 +251,19 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
     if (!I.isBranch())
       continue;
 
+    // The analyzeBranch method does not include any implicit operands.
+    // This is not an issue on PPC but must be handled on other targets.
+    // For this pass to be made target-independent, the analyzeBranch API
+    // need to be updated to support implicit operands and there would
+    // need to be a way to verify that any implicit operands would not be
+    // clobbered by merging blocks.  This would include identifying the
+    // implicit operands as well as the basic block they are defined in.
+    // This could be done by changing the analyzeBranch API to have it also
+    // record and return the implicit operands and the blocks where they are
+    // defined. Alternatively, the BranchCoalescing code would need to be
+    // extended to identify the implicit operands.  The analysis in canMerge
+    // must then be extended to prove that none of the implicit operands are
+    // changed in the blocks that are combined during coalescing.
     if (I.getNumOperands() != I.getNumExplicitOperands()) {
       DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
                    << "\n");
@@ -309,7 +327,7 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
 /// \param[in] OpList2 operand list
 /// \return true if and only if the operands lists are identical
 ///
-bool BranchCoalescing::identicalOperands(
+bool PPCBranchCoalescing::identicalOperands(
     ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
 
   if (OpList1.size() != OpList2.size()) {
@@ -325,6 +343,14 @@ bool BranchCoalescing::identicalOperands(
                  << "Op2: " << Op2 << "\n");
 
     if (Op1.isIdenticalTo(Op2)) {
+      // filter out instructions with physical-register uses
+      if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg())
+        // If the physical register is constant then we can assume the value
+        // has not changed between uses.
+          && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) {
+        DEBUG(dbgs() << "The operands are not provably identical.\n");
+        return false;
+      }
       DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
       continue;
     }
@@ -349,6 +375,7 @@ bool BranchCoalescing::identicalOperands(
       return false;
     }
   }
+
   return true;
 }
 
@@ -361,7 +388,7 @@ bool BranchCoalescing::identicalOperands(
 /// \param[in] SourceMBB block to move PHI instructions from
 /// \param[in] TargetMBB block to move PHI instructions to
 ///
-void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
+void PPCBranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
                                          MachineBasicBlock *TargetMBB) {
 
   MachineBasicBlock::iterator MI = SourceMBB->begin();
@@ -394,7 +421,7 @@ void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
 /// \return true if it is safe to move MI to beginning of TargetMBB,
 ///         false otherwise.
 ///
-bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
+bool PPCBranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
                                           const MachineBasicBlock &TargetMBB
                                           ) const {
 
@@ -425,7 +452,7 @@ bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
 /// \return true if it is safe to move MI to end of TargetMBB,
 ///         false otherwise.
 ///
-bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
+bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI,
                                     const MachineBasicBlock &TargetMBB
                                     ) const {
 
@@ -457,7 +484,7 @@ bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
 /// \return true if all instructions in SourceRegion.BranchBlock can be merged
 /// into a block in TargetRegion; false otherwise.
 ///
-bool BranchCoalescing::validateCandidates(
+bool PPCBranchCoalescing::validateCandidates(
     CoalescingCandidateInfo &SourceRegion,
     CoalescingCandidateInfo &TargetRegion) const {
 
@@ -500,7 +527,7 @@ bool BranchCoalescing::validateCandidates(
 /// \return true if all instructions in SourceRegion.BranchBlock can be merged
 ///         into a block in TargetRegion, false otherwise.
 ///
-bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
+bool PPCBranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
                                 CoalescingCandidateInfo &TargetRegion) const {
   if (!validateCandidates(SourceRegion, TargetRegion))
     return false;
@@ -605,7 +632,7 @@ bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
 /// \param[in] SourceRegion The candidate to move blocks from
 /// \param[in] TargetRegion The candidate to move blocks to
 ///
-bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
                                        CoalescingCandidateInfo &TargetRegion) {
 
   if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
@@ -685,10 +712,9 @@ bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
   return true;
 }
 
-bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
+bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
 
-  if (skipFunction(*MF.getFunction()) || MF.empty() ||
-      !isBranchCoalescingEnabled())
+  if (skipFunction(*MF.getFunction()) || MF.empty())
     return false;
 
   bool didSomething = false;

+ 8 - 0
lib/Target/PowerPC/PPCTargetMachine.cpp

@@ -40,6 +40,10 @@
 
 using namespace llvm;
 
+
+static cl::opt<bool>
+    EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
+                           cl::desc("enable coalescing of duplicate branches for PPC"));
 static cl::
 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
                         cl::desc("Disable CTR loops for PPC"));
@@ -378,6 +382,10 @@ bool PPCPassConfig::addInstSelector() {
 }
 
 void PPCPassConfig::addMachineSSAOptimization() {
+  // PPCBranchCoalescingPass need to be done before machine sinking
+  // since it merges empty blocks.
+  if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCBranchCoalescingPass());
   TargetPassConfig::addMachineSSAOptimization();
   // For little endian, remove where possible the vector swap instructions
   // introduced at code generation to normalize vector element order.

+ 43 - 14
test/CodeGen/PowerPC/branch_coalesce.ll

@@ -1,26 +1,19 @@
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s 
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s 
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s
 
 ; Function Attrs: nounwind
 define double @testBranchCoal(double %a, double %b, double %c, i32 %x) {
-entry:
-  %test = icmp eq i32 %x, 0
-  %tmp1 = select i1 %test, double %a, double 2.000000e-03
-  %tmp2 = select i1 %test, double %b, double 0.000000e+00
-  %tmp3 = select i1 %test, double %c, double 5.000000e-03
 
-  %res1 = fadd double %tmp1, %tmp2
-  %result = fadd double %res1, %tmp3
-  ret double %result
-
-; CHECK-LABEL: @testBranchCoal 
+; CHECK-LABEL: @testBranchCoal
 ; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0
 ; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]]
 ; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha
 ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha
 ; CHECK-DAG: xxlxor 2, 2, 2
-; CHECK-NOT: beq 
-; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] 
+; CHECK-NOT: beq
+; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]]
 ; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]]
 ; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]]
 ; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]]
@@ -28,4 +21,40 @@ entry:
 ; CHECK: xsadddp 0, 1, 2
 ; CHECK: xsadddp 1, 0, 3
 ; CHECK: blr
+
+; CHECK-NOCOALESCE-LABEL: testBranchCoal:
+; CHECK-NOCOALESCE:       # BB#0: # %entry
+; CHECK-NOCOALESCE-NEXT:    cmplwi 0, 6, 0
+; CHECK-NOCOALESCE-NEXT:    bne 0, .LBB0_5
+; CHECK-NOCOALESCE-NEXT:  # BB#1: # %entry
+; CHECK-NOCOALESCE-NEXT:    bne 0, .LBB0_6
+; CHECK-NOCOALESCE-NEXT:  .LBB0_2: # %entry
+; CHECK-NOCOALESCE-NEXT:    beq 0, .LBB0_4
+; CHECK-NOCOALESCE-NEXT:  .LBB0_3: # %entry
+; CHECK-NOCOALESCE-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
+; CHECK-NOCOALESCE-NEXT:    addi 3, 3, .LCPI0_1@toc@l
+; CHECK-NOCOALESCE-NEXT:    lxsdx 3, 0, 3
+; CHECK-NOCOALESCE-NEXT:  .LBB0_4: # %entry
+; CHECK-NOCOALESCE-NEXT:    xsadddp 0, 1, 2
+; CHECK-NOCOALESCE-NEXT:    xsadddp 1, 0, 3
+; CHECK-NOCOALESCE-NEXT:    blr
+; CHECK-NOCOALESCE-NEXT:  .LBB0_5: # %entry
+; CHECK-NOCOALESCE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NOCOALESCE-NEXT:    addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NOCOALESCE-NEXT:    lxsdx 1, 0, 3
+; CHECK-NOCOALESCE-NEXT:    beq 0, .LBB0_2
+; CHECK-NOCOALESCE-NEXT:  .LBB0_6: # %entry
+; CHECK-NOCOALESCE-NEXT:    xxlxor 2, 2, 2
+; CHECK-NOCOALESCE-NEXT:    bne 0, .LBB0_3
+; CHECK-NOCOALESCE-NEXT:    b .LBB0_4
+  entry:
+
+  %test = icmp eq i32 %x, 0
+  %tmp1 = select i1 %test, double %a, double 2.000000e-03
+  %tmp2 = select i1 %test, double %b, double 0.000000e+00
+  %tmp3 = select i1 %test, double %c, double 5.000000e-03
+
+  %res1 = fadd double %tmp1, %tmp2
+  %result = fadd double %res1, %tmp3
+  ret double %result
 }