Jelajahi Sumber

[CodeGen] Eagerly emit lifetime.end markers for calls

In C, we'll wait until the end of the scope to clean up aggregate
temporaries used for returns from calls. This means in cases like:

{
  // Assuming that `Bar` is large enough to warrant indirect returns
  struct Bar b = {};
  b = foo(&b);
  b = foo(&b);
  b = foo(&b);
  b = foo(&b);
}

...We'll allocate space for 5 Bars on the stack (`b`, and 4
temporaries). This becomes painful in things like large switch
statements.

If cleaning up sooner is trivial, we should do it.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@327229 91177308-0d34-0410-b5e6-96231b3b80d8
George Burgess IV 7 tahun lalu
induk
melakukan
c28eb6d02c
2 mengubah file dengan 129 tambahan dan 6 penghapusan
  1. 28 6
      lib/CodeGen/CGExprAgg.cpp
  2. 101 0
      test/CodeGen/aggregate-assign-call.c

+ 28 - 6
lib/CodeGen/CGExprAgg.cpp

@@ -23,6 +23,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
 using namespace clang;
 using namespace clang;
 using namespace CodeGen;
 using namespace CodeGen;
 
 
@@ -48,7 +49,7 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
 
 
   // Calls `Fn` with a valid return value slot, potentially creating a temporary
   // Calls `Fn` with a valid return value slot, potentially creating a temporary
   // to do so. If a temporary is created, an appropriate copy into `Dest` will
   // to do so. If a temporary is created, an appropriate copy into `Dest` will
-  // be emitted.
+  // be emitted, as will lifetime markers.
   //
   //
   // The given function should take a ReturnValueSlot, and return an RValue that
   // The given function should take a ReturnValueSlot, and return an RValue that
   // points to said slot.
   // points to said slot.
@@ -250,16 +251,28 @@ void AggExprEmitter::withReturnValueSlot(
                  (RequiresDestruction && !Dest.getAddress().isValid());
                  (RequiresDestruction && !Dest.getAddress().isValid());
 
 
   Address RetAddr = Address::invalid();
   Address RetAddr = Address::invalid();
+
+  EHScopeStack::stable_iterator LifetimeEndBlock;
+  llvm::Value *LifetimeSizePtr = nullptr;
+  llvm::IntrinsicInst *LifetimeStartInst = nullptr;
   if (!UseTemp) {
   if (!UseTemp) {
     RetAddr = Dest.getAddress();
     RetAddr = Dest.getAddress();
   } else {
   } else {
     RetAddr = CGF.CreateMemTemp(RetTy);
     RetAddr = CGF.CreateMemTemp(RetTy);
     uint64_t Size =
     uint64_t Size =
         CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
         CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
-    if (llvm::Value *LifetimeSizePtr =
-            CGF.EmitLifetimeStart(Size, RetAddr.getPointer()))
+    LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAddr.getPointer());
+    if (LifetimeSizePtr) {
+      LifetimeStartInst =
+          cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint()));
+      assert(LifetimeStartInst->getIntrinsicID() ==
+                 llvm::Intrinsic::lifetime_start &&
+             "Last insertion wasn't a lifetime.start?");
+
       CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
       CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
           NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr);
           NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr);
+      LifetimeEndBlock = CGF.EHStack.stable_begin();
+    }
   }
   }
 
 
   RValue Src =
   RValue Src =
@@ -268,9 +281,18 @@ void AggExprEmitter::withReturnValueSlot(
   if (RequiresDestruction)
   if (RequiresDestruction)
     CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
     CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
 
 
-  if (UseTemp) {
-    assert(Dest.getPointer() != Src.getAggregatePointer());
-    EmitFinalDestCopy(E->getType(), Src);
+  if (!UseTemp)
+    return;
+
+  assert(Dest.getPointer() != Src.getAggregatePointer());
+  EmitFinalDestCopy(E->getType(), Src);
+
+  if (!RequiresDestruction && LifetimeStartInst) {
+    // If there's no dtor to run, the copy was the last use of our temporary.
+    // Since we're not guaranteed to be in an ExprWithCleanups, clean up
+    // eagerly.
+    CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst);
+    CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAddr.getPointer());
   }
   }
 }
 }
 
 

+ 101 - 0
test/CodeGen/aggregate-assign-call.c

@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O0
+//
+// Ensure that we place appropriate lifetime markers around indirectly returned
+// temporaries, and that the lifetime.ends appear in a timely manner.
+//
+// -O1 is used so lifetime markers actually get emitted.
+
+struct S {
+  int ns[40];
+};
+
+struct S foo(void);
+
+// CHECK-LABEL: define dso_local void @bar
+struct S bar() {
+  // O0-NOT: @llvm.lifetime.start
+  // O0-NOT: @llvm.lifetime.end
+
+  struct S r;
+  // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP:[^)]+]])
+
+  // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP1:[^)]+]])
+  // O1: call void @foo
+  r = foo();
+  // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP1]])
+
+  // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP2:[^)]+]])
+  // O1: call void @foo
+  r = foo();
+  // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP2]])
+
+  // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP3:[^)]+]])
+  // O1: call void @foo
+  r = foo();
+  // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]])
+
+  // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP]])
+  return r;
+}
+
+struct S foo_int(int);
+
+// Be sure that we're placing the lifetime.end so that all paths go through it.
+// Since this function turns out to be large-ish, optnone to hopefully keep it
+// stable.
+// CHECK-LABEL: define dso_local void @baz
+__attribute__((optnone))
+struct S baz(int i, volatile int *j) {
+  // O0-NOT: @llvm.lifetime.start
+  // O0-NOT: @llvm.lifetime.end
+
+  struct S r;
+  // O1: %[[RESULT_ALLOCA:[^ ]+]] = alloca %struct.S
+  // O1: %[[TMP1_ALLOCA:[^ ]+]] = alloca %struct.S
+  // O1: %[[TMP2_ALLOCA:[^ ]+]] = alloca %struct.S
+  // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8*
+  // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
+  // O1: br label %[[DO_BODY:.+]]
+
+  do {
+    // O1: [[DO_BODY]]:
+    // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
+    // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
+    // O1: br i1 {{[^,]+}}, label %[[IF_THEN:[^,]+]], label %[[IF_END:[^,]+]]
+    //
+    // O1: [[IF_THEN]]:
+    // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
+    // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
+    // O1: br label %[[DO_END:.*]]
+    //
+    // O1: [[IF_END]]:
+    // O1: call void @foo_int(%struct.S* sret %[[TMP1_ALLOCA]],
+    // O1: call void @llvm.memcpy
+    // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
+    // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
+    // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
+    // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
+    // O1: call void @foo_int(%struct.S* sret %[[TMP2_ALLOCA]],
+    // O1: call void @llvm.memcpy
+    // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
+    // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
+    // O1: br label %[[DO_COND:.*]]
+    //
+    // O1: [[DO_COND]]:
+    // O1: br label %[[DO_BODY]]
+    r = foo_int(({
+      if (*j)
+        break;
+      i++;
+    }));
+
+    r = foo_int(i++);
+   } while (1);
+
+  // O1: [[DO_END]]:
+  // O1: call void @llvm.memcpy
+  // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8*
+  // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
+  return r;
+}