7 жил өмнө · f8faafd0e5
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -37,6 +37,7 @@
 
				 #include "llvm/Support/Debug.h"
			
 
				 #include "llvm/Support/ErrorHandling.h"
			
 
				 #include "llvm/Support/raw_ostream.h"
			
 
				+#include "llvm/Target/TargetMachine.h"
			
 
				 #include <algorithm>
			
 
				 #include <cassert>
			
 
				 #include <iterator>
			
@@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction(
 
				     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
			
 
				     DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
			
 
				     const DenseSet<unsigned> &UsedByPhi) {
			
 
				+  // Allow duplication of CFI instructions.
			
 
				+  if (MI->isCFIInstruction()) {
			
 
				+    BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
			
 
				+      TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
			
 
				+      MI->getOperand(0).getCFIIndex());
			
 
				+    return;
			
 
				+  }
			
 
				   MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
			
 
				   if (PreRegAlloc) {
			
 
				     for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
			
@@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
 
				   unsigned InstrCount = 0;
			
 
				   for (MachineInstr &MI : TailBB) {
			
 
				     // Non-duplicable things shouldn't be tail-duplicated.
			
 
				-    if (MI.isNotDuplicable())
			
 
				+    // CFI instructions are marked as non-duplicable, because Darwin compact
			
 
				+    // unwind info emission can't handle multiple prologue setups. In case of
			
 
				+    // DWARF, allow them be duplicated, so that their existence doesn't prevent
			
 
				+    // tail duplication of some basic blocks, that would be duplicated otherwise.
			
 
				+    if (MI.isNotDuplicable() &&
			
 
				+        (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
			
 
				+        !MI.isCFIInstruction()))
			
 
				       return false;
			
 
				 
			
 
				     // Convergent instructions can be duplicated only if doing so doesn't add
			
@@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
 
				     if (PreRegAlloc && MI.isCall())
			
 
				       return false;
			
 
				 
			
 
				-    if (!MI.isPHI() && !MI.isDebugValue())
			
 
				+    if (!MI.isPHI() && !MI.isMetaInstruction())
			
 
				       InstrCount += 1;
			
 
				 
			
 
				     if (InstrCount > MaxDuplicateCount)
			
--- a/test/CodeGen/AArch64/taildup-cfi.ll
+++ b/test/CodeGen/AArch64/taildup-cfi.ll
@@ -0,0 +1,96 @@
 
				+; REQUIRES: asserts
			
 
				+; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX
			
 
				+; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN
			
 
				+
			
 
				+; ModuleID = 'taildup-cfi.c'
			
 
				+source_filename = "taildup-cfi.c"
			
 
				+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
			
 
				+
			
 
				+@g = common local_unnamed_addr global i32 0, align 4
			
 
				+@f = common local_unnamed_addr global i32 0, align 4
			
 
				+@a = common local_unnamed_addr global i32 0, align 4
			
 
				+@m = common local_unnamed_addr global i32 0, align 4
			
 
				+@l = common local_unnamed_addr global i32 0, align 4
			
 
				+@j = common local_unnamed_addr global i32 0, align 4
			
 
				+@k = common local_unnamed_addr global i32 0, align 4
			
 
				+@i = common local_unnamed_addr global i32 0, align 4
			
 
				+@d = common local_unnamed_addr global i32 0, align 4
			
 
				+@c = common local_unnamed_addr global i32 0, align 4
			
 
				+@e = common local_unnamed_addr global i32 0, align 4
			
 
				+@h = common local_unnamed_addr global i32 0, align 4
			
 
				+
			
 
				+; Function Attrs: norecurse nounwind uwtable
			
 
				+define void @n(i32 %o, i32* nocapture readonly %b) local_unnamed_addr #0 {
			
 
				+entry:
			
 
				+  %0 = load i32, i32* @g, align 4, !tbaa !2
			
 
				+  %tobool = icmp eq i32 %0, 0
			
 
				+  br i1 %tobool, label %entry.if.end_crit_edge, label %if.then
			
 
				+
			
 
				+entry.if.end_crit_edge:                           ; preds = %entry
			
 
				+  %.pre = load i32, i32* @f, align 4, !tbaa !2
			
 
				+  br label %if.end
			
 
				+
			
 
				+if.then:                                          ; preds = %entry
			
 
				+  store i32 0, i32* @f, align 4, !tbaa !2
			
 
				+  br label %if.end
			
 
				+
			
 
				+; DARWIN-NOT:       Merging into block
			
 
				+; LINUX:    	      Merging into block
			
 
				+
			
 
				+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
			
 
				+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ]
			
 
				+  %cmp6 = icmp slt i32 %1, %o
			
 
				+  br i1 %cmp6, label %for.body.lr.ph, label %for.end
			
 
				+
			
 
				+for.body.lr.ph:                                   ; preds = %if.end
			
 
				+  %.pre7 = load i32, i32* @a, align 4, !tbaa !2
			
 
				+  %.pre8 = load i32, i32* @l, align 4, !tbaa !2
			
 
				+  %.pre9 = load i32, i32* @j, align 4, !tbaa !2
			
 
				+  %.pre10 = load i32, i32* @k, align 4, !tbaa !2
			
 
				+  %.pre11 = load i32, i32* @i, align 4, !tbaa !2
			
 
				+  br label %for.body
			
 
				+
			
 
				+for.body:                                         ; preds = %if.end5, %for.body.lr.ph
			
 
				+  %2 = phi i32 [ %.pre11, %for.body.lr.ph ], [ %7, %if.end5 ]
			
 
				+  %3 = phi i32 [ %.pre10, %for.body.lr.ph ], [ %8, %if.end5 ]
			
 
				+  %4 = phi i32 [ %.pre9, %for.body.lr.ph ], [ %9, %if.end5 ]
			
 
				+  %5 = phi i32 [ %1, %for.body.lr.ph ], [ %inc, %if.end5 ]
			
 
				+  store i32 %.pre7, i32* @m, align 4, !tbaa !2
			
 
				+  %mul = mul nsw i32 %3, %4
			
 
				+  %cmp1 = icmp sgt i32 %.pre8, %mul
			
 
				+  %conv = zext i1 %cmp1 to i32
			
 
				+  %cmp2 = icmp slt i32 %2, %conv
			
 
				+  br i1 %cmp2, label %if.then4, label %if.end5
			
 
				+
			
 
				+if.then4:                                         ; preds = %for.body
			
 
				+  %6 = load i32, i32* @d, align 4, !tbaa !2
			
 
				+  store i32 %6, i32* @k, align 4, !tbaa !2
			
 
				+  store i32 %6, i32* @i, align 4, !tbaa !2
			
 
				+  store i32 %6, i32* @j, align 4, !tbaa !2
			
 
				+  br label %if.end5
			
 
				+
			
 
				+if.end5:                                          ; preds = %if.then4, %for.body
			
 
				+  %7 = phi i32 [ %6, %if.then4 ], [ %2, %for.body ]
			
 
				+  %8 = phi i32 [ %6, %if.then4 ], [ %3, %for.body ]
			
 
				+  %9 = phi i32 [ %6, %if.then4 ], [ %4, %for.body ]
			
 
				+  %10 = load i32, i32* @c, align 4, !tbaa !2
			
 
				+  %idxprom = sext i32 %10 to i64
			
 
				+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
			
 
				+  %11 = load i32, i32* %arrayidx, align 4, !tbaa !2
			
 
				+  %12 = load i32, i32* @e, align 4, !tbaa !2
			
 
				+  %sub = sub nsw i32 %11, %12
			
 
				+  store i32 %sub, i32* @h, align 4, !tbaa !2
			
 
				+  %inc = add nsw i32 %5, 1
			
 
				+  store i32 %inc, i32* @f, align 4, !tbaa !2
			
 
				+  %exitcond = icmp eq i32 %inc, %o
			
 
				+  br i1 %exitcond, label %for.end, label %for.body
			
 
				+
			
 
				+for.end:                                          ; preds = %if.end5, %if.end
			
 
				+  ret void
			
 
				+}
			
 
				+
			
 
				+attributes #0 = { norecurse nounwind uwtable }
			
 
				+
			
 
				+!2 = !{!3, !3, i64 0}
			
 
				+!3 = !{!"int", !4, i64 0}
			
 
				+!4 = !{}
			
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -712,10 +712,12 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
 
				 ; AVX512BW-NEXT:    jg LBB17_1
			
 
				 ; AVX512BW-NEXT:  ## %bb.2:
			
 
				 ; AVX512BW-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
			
 
				-; AVX512BW-NEXT:    jmp LBB17_3
			
 
				+; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
			
 
				+; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
			
 
				+; AVX512BW-NEXT:    vzeroupper
			
 
				+; AVX512BW-NEXT:    retq
			
 
				 ; AVX512BW-NEXT:  LBB17_1:
			
 
				 ; AVX512BW-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
			
 
				-; AVX512BW-NEXT:  LBB17_3:
			
 
				 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
			
 
				 ; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
			
 
				 ; AVX512BW-NEXT:    vzeroupper