Эх сурвалжийг харах

[DWARF] Allow duplication of tails with CFI instructions

This commit came as a result for revert of patch r317579 (originally
committed as r317100). The patch made CFI instructions duplicable, because
their existence in the epilogue block was affecting the Tail duplication
pass. However, duplicating blocks with CFI instructions was an issue for
compact unwind info on Darwin, which is why the patch was reverted.

This patch allows duplicating tails with CFI instructions, though they are
not duplicable, by copying them 'manually'.


Patch by Djordje Kovacevic.

Differential Revision: https://reviews.llvm.org/D40979


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323883 91177308-0d34-0410-b5e6-96231b3b80d8
Petar Jovanovic 7 жил өмнө
parent
commit
f8faafd0e5

+ 16 - 2
lib/CodeGen/TailDuplicator.cpp

@@ -37,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
     DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
     const DenseSet<unsigned> &UsedByPhi) {
+  // Allow duplication of CFI instructions.
+  if (MI->isCFIInstruction()) {
+    BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
+      TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
+      MI->getOperand(0).getCFIIndex());
+    return;
+  }
   MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
   if (PreRegAlloc) {
     for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
@@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   unsigned InstrCount = 0;
   for (MachineInstr &MI : TailBB) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (MI.isNotDuplicable())
+    // CFI instructions are marked as non-duplicable, because Darwin compact
+    // unwind info emission can't handle multiple prologue setups. In case of
+    // DWARF, allow them be duplicated, so that their existence doesn't prevent
+    // tail duplication of some basic blocks, that would be duplicated otherwise.
+    if (MI.isNotDuplicable() &&
+        (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
+        !MI.isCFIInstruction()))
       return false;
 
     // Convergent instructions can be duplicated only if doing so doesn't add
@@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
     if (PreRegAlloc && MI.isCall())
       return false;
 
-    if (!MI.isPHI() && !MI.isDebugValue())
+    if (!MI.isPHI() && !MI.isMetaInstruction())
       InstrCount += 1;
 
     if (InstrCount > MaxDuplicateCount)

+ 96 - 0
test/CodeGen/AArch64/taildup-cfi.ll

@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX
+; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN
+
+; ModuleID = 'taildup-cfi.c'
+source_filename = "taildup-cfi.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@g = common local_unnamed_addr global i32 0, align 4
+@f = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i32 0, align 4
+@m = common local_unnamed_addr global i32 0, align 4
+@l = common local_unnamed_addr global i32 0, align 4
+@j = common local_unnamed_addr global i32 0, align 4
+@k = common local_unnamed_addr global i32 0, align 4
+@i = common local_unnamed_addr global i32 0, align 4
+@d = common local_unnamed_addr global i32 0, align 4
+@c = common local_unnamed_addr global i32 0, align 4
+@e = common local_unnamed_addr global i32 0, align 4
+@h = common local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind uwtable
+define void @n(i32 %o, i32* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+  %0 = load i32, i32* @g, align 4, !tbaa !2
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %entry.if.end_crit_edge, label %if.then
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  %.pre = load i32, i32* @f, align 4, !tbaa !2
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* @f, align 4, !tbaa !2
+  br label %if.end
+
+; DARWIN-NOT:       Merging into block
+; LINUX:    	      Merging into block
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ]
+  %cmp6 = icmp slt i32 %1, %o
+  br i1 %cmp6, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %if.end
+  %.pre7 = load i32, i32* @a, align 4, !tbaa !2
+  %.pre8 = load i32, i32* @l, align 4, !tbaa !2
+  %.pre9 = load i32, i32* @j, align 4, !tbaa !2
+  %.pre10 = load i32, i32* @k, align 4, !tbaa !2
+  %.pre11 = load i32, i32* @i, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %if.end5, %for.body.lr.ph
+  %2 = phi i32 [ %.pre11, %for.body.lr.ph ], [ %7, %if.end5 ]
+  %3 = phi i32 [ %.pre10, %for.body.lr.ph ], [ %8, %if.end5 ]
+  %4 = phi i32 [ %.pre9, %for.body.lr.ph ], [ %9, %if.end5 ]
+  %5 = phi i32 [ %1, %for.body.lr.ph ], [ %inc, %if.end5 ]
+  store i32 %.pre7, i32* @m, align 4, !tbaa !2
+  %mul = mul nsw i32 %3, %4
+  %cmp1 = icmp sgt i32 %.pre8, %mul
+  %conv = zext i1 %cmp1 to i32
+  %cmp2 = icmp slt i32 %2, %conv
+  br i1 %cmp2, label %if.then4, label %if.end5
+
+if.then4:                                         ; preds = %for.body
+  %6 = load i32, i32* @d, align 4, !tbaa !2
+  store i32 %6, i32* @k, align 4, !tbaa !2
+  store i32 %6, i32* @i, align 4, !tbaa !2
+  store i32 %6, i32* @j, align 4, !tbaa !2
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then4, %for.body
+  %7 = phi i32 [ %6, %if.then4 ], [ %2, %for.body ]
+  %8 = phi i32 [ %6, %if.then4 ], [ %3, %for.body ]
+  %9 = phi i32 [ %6, %if.then4 ], [ %4, %for.body ]
+  %10 = load i32, i32* @c, align 4, !tbaa !2
+  %idxprom = sext i32 %10 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %11 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %12 = load i32, i32* @e, align 4, !tbaa !2
+  %sub = sub nsw i32 %11, %12
+  store i32 %sub, i32* @h, align 4, !tbaa !2
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* @f, align 4, !tbaa !2
+  %exitcond = icmp eq i32 %inc, %o
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end5, %if.end
+  ret void
+}
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{}

+ 4 - 2
test/CodeGen/X86/avx512-mask-op.ll

@@ -712,10 +712,12 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
 ; AVX512BW-NEXT:    jg LBB17_1
 ; AVX512BW-NEXT:  ## %bb.2:
 ; AVX512BW-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
-; AVX512BW-NEXT:    jmp LBB17_3
+; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB17_1:
 ; AVX512BW-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
-; AVX512BW-NEXT:  LBB17_3:
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512BW-NEXT:    vzeroupper