|
@@ -388,9 +388,9 @@ class TypePromotionTransaction;
|
|
bool tryToSinkFreeOperands(Instruction *I);
|
|
bool tryToSinkFreeOperands(Instruction *I);
|
|
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
|
|
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
|
|
Intrinsic::ID IID);
|
|
Intrinsic::ID IID);
|
|
- bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
|
|
|
|
- bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
|
|
|
|
- bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
|
|
|
|
|
|
+ bool optimizeCmp(CmpInst *Cmp);
|
|
|
|
+ bool combineToUSubWithOverflow(CmpInst *Cmp);
|
|
|
|
+ bool combineToUAddWithOverflow(CmpInst *Cmp);
|
|
};
|
|
};
|
|
|
|
|
|
} // end anonymous namespace
|
|
} // end anonymous namespace
|
|
@@ -484,9 +484,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
|
if (!LargeOffsetGEPMap.empty())
|
|
if (!LargeOffsetGEPMap.empty())
|
|
MadeChange |= splitLargeGEPOffsets();
|
|
MadeChange |= splitLargeGEPOffsets();
|
|
|
|
|
|
- // Really free removed instructions during promotion.
|
|
|
|
- for (Instruction *I : RemovedInsts)
|
|
|
|
|
|
+ // Really free instructions removed during promotion or kept around to
|
|
|
|
+ // improve efficiency (see comments in overflow intrinsic transforms).
|
|
|
|
+ for (Instruction *I : RemovedInsts) {
|
|
|
|
+ if (I->getParent())
|
|
|
|
+ I->removeFromParent();
|
|
I->deleteValue();
|
|
I->deleteValue();
|
|
|
|
+ }
|
|
|
|
|
|
EverMadeChange |= MadeChange;
|
|
EverMadeChange |= MadeChange;
|
|
SeenChainsForSExt.clear();
|
|
SeenChainsForSExt.clear();
|
|
@@ -1177,6 +1181,20 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
|
|
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
|
|
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
|
|
CmpInst *Cmp,
|
|
CmpInst *Cmp,
|
|
Intrinsic::ID IID) {
|
|
Intrinsic::ID IID) {
|
|
|
|
+ if (BO->getParent() != Cmp->getParent()) {
|
|
|
|
+ // We used to use a dominator tree here to allow multi-block optimization.
|
|
|
|
+ // But that was problematic because:
|
|
|
|
+ // 1. It could cause a perf regression by hoisting the math op into the
|
|
|
|
+ // critical path.
|
|
|
|
+ // 2. It could cause a perf regression by creating a value that was live
|
|
|
|
+ // across multiple blocks and increasing register pressure.
|
|
|
|
+ // 3. Use of a dominator tree could cause large compile-time regression.
|
|
|
|
+ // This is because we recompute the DT on every change in the main CGP
|
|
|
|
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
|
|
|
|
+ // that was fixed, using a DT here would be ok.
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
|
|
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
|
|
Value *Arg0 = BO->getOperand(0);
|
|
Value *Arg0 = BO->getOperand(0);
|
|
Value *Arg1 = BO->getOperand(1);
|
|
Value *Arg1 = BO->getOperand(1);
|
|
@@ -1186,45 +1204,28 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
|
|
Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
|
|
Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
|
|
}
|
|
}
|
|
|
|
|
|
- Instruction *InsertPt;
|
|
|
|
- if (BO->hasOneUse() && BO->user_back() == Cmp) {
|
|
|
|
- // If the math is only used by the compare, insert at the compare to keep
|
|
|
|
- // the condition in the same block as its users. (CGP aggressively sinks
|
|
|
|
- // compares to help out SDAG.)
|
|
|
|
- InsertPt = Cmp;
|
|
|
|
- } else {
|
|
|
|
- // The math and compare may be independent instructions. Check dominance to
|
|
|
|
- // determine the insertion point for the intrinsic.
|
|
|
|
- bool MathDominates = getDT(*BO->getFunction()).dominates(BO, Cmp);
|
|
|
|
- if (!MathDominates && !getDT(*BO->getFunction()).dominates(Cmp, BO))
|
|
|
|
- return false;
|
|
|
|
-
|
|
|
|
- BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent();
|
|
|
|
- if (MathBB != CmpBB) {
|
|
|
|
- // Avoid hoisting an extra op into a dominating block and creating a
|
|
|
|
- // potentially longer critical path.
|
|
|
|
- if (!MathDominates)
|
|
|
|
- return false;
|
|
|
|
- // Check that the insertion doesn't create a value that is live across
|
|
|
|
- // more than two blocks, so to minimise the increase in register pressure.
|
|
|
|
- BasicBlock *Dominator = MathDominates ? MathBB : CmpBB;
|
|
|
|
- BasicBlock *Dominated = MathDominates ? CmpBB : MathBB;
|
|
|
|
- auto Successors = successors(Dominator);
|
|
|
|
- if (llvm::find(Successors, Dominated) == Successors.end())
|
|
|
|
- return false;
|
|
|
|
|
|
+ // Insert at the first instruction of the pair.
|
|
|
|
+ Instruction *InsertPt = nullptr;
|
|
|
|
+ for (Instruction &Iter : *Cmp->getParent()) {
|
|
|
|
+ if (&Iter == BO || &Iter == Cmp) {
|
|
|
|
+ InsertPt = &Iter;
|
|
|
|
+ break;
|
|
}
|
|
}
|
|
-
|
|
|
|
- InsertPt = MathDominates ? cast<Instruction>(BO) : cast<Instruction>(Cmp);
|
|
|
|
}
|
|
}
|
|
|
|
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
|
|
|
|
|
|
IRBuilder<> Builder(InsertPt);
|
|
IRBuilder<> Builder(InsertPt);
|
|
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
|
|
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
|
|
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
|
|
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
|
|
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
|
|
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
|
|
|
|
+
|
|
|
|
+ // Delay the actual removal/deletion of the binop and compare for efficiency.
|
|
|
|
+ // If we delete those now, we would invalidate the instruction iterator and
|
|
|
|
+ // trigger dominator tree updates.
|
|
BO->replaceAllUsesWith(Math);
|
|
BO->replaceAllUsesWith(Math);
|
|
Cmp->replaceAllUsesWith(OV);
|
|
Cmp->replaceAllUsesWith(OV);
|
|
- BO->eraseFromParent();
|
|
|
|
- Cmp->eraseFromParent();
|
|
|
|
|
|
+ RemovedInsts.insert(BO);
|
|
|
|
+ RemovedInsts.insert(Cmp);
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1260,8 +1261,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
|
|
|
|
|
|
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
|
|
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
|
|
/// intrinsic. Return true if any changes were made.
|
|
/// intrinsic. Return true if any changes were made.
|
|
-bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
|
|
|
|
- bool &ModifiedDT) {
|
|
|
|
|
|
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp) {
|
|
Value *A, *B;
|
|
Value *A, *B;
|
|
BinaryOperator *Add;
|
|
BinaryOperator *Add;
|
|
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
|
|
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
|
|
@@ -1281,13 +1281,10 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
|
|
if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
|
|
if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
|
|
return false;
|
|
return false;
|
|
|
|
|
|
- // Reset callers - do not crash by iterating over a dead instruction.
|
|
|
|
- ModifiedDT = true;
|
|
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
-bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
|
|
|
|
- bool &ModifiedDT) {
|
|
|
|
|
|
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp) {
|
|
// We are not expecting non-canonical/degenerate code. Just bail out.
|
|
// We are not expecting non-canonical/degenerate code. Just bail out.
|
|
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
|
|
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
|
|
if (isa<Constant>(A) && isa<Constant>(B))
|
|
if (isa<Constant>(A) && isa<Constant>(B))
|
|
@@ -1342,8 +1339,6 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
|
|
if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
|
|
if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
|
|
return false;
|
|
return false;
|
|
|
|
|
|
- // Reset callers - do not crash by iterating over a dead instruction.
|
|
|
|
- ModifiedDT = true;
|
|
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1413,14 +1408,14 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
|
|
return MadeChange;
|
|
return MadeChange;
|
|
}
|
|
}
|
|
|
|
|
|
-bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
|
|
|
|
|
|
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp) {
|
|
if (sinkCmpExpression(Cmp, *TLI))
|
|
if (sinkCmpExpression(Cmp, *TLI))
|
|
return true;
|
|
return true;
|
|
|
|
|
|
- if (combineToUAddWithOverflow(Cmp, ModifiedDT))
|
|
|
|
|
|
+ if (combineToUAddWithOverflow(Cmp))
|
|
return true;
|
|
return true;
|
|
|
|
|
|
- if (combineToUSubWithOverflow(Cmp, ModifiedDT))
|
|
|
|
|
|
+ if (combineToUSubWithOverflow(Cmp))
|
|
return true;
|
|
return true;
|
|
|
|
|
|
return false;
|
|
return false;
|
|
@@ -6945,7 +6940,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
|
|
}
|
|
}
|
|
|
|
|
|
if (auto *Cmp = dyn_cast<CmpInst>(I))
|
|
if (auto *Cmp = dyn_cast<CmpInst>(I))
|
|
- if (TLI && optimizeCmp(Cmp, ModifiedDT))
|
|
|
|
|
|
+ if (TLI && optimizeCmp(Cmp))
|
|
return true;
|
|
return true;
|
|
|
|
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|