ExpandReductions.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass implements IR expansion for reduction intrinsics, allowing targets
  10. // to enable the experimental intrinsics until just before codegen.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/CodeGen/ExpandReductions.h"
  14. #include "llvm/Analysis/TargetTransformInfo.h"
  15. #include "llvm/CodeGen/Passes.h"
  16. #include "llvm/IR/Function.h"
  17. #include "llvm/IR/IRBuilder.h"
  18. #include "llvm/IR/InstIterator.h"
  19. #include "llvm/IR/IntrinsicInst.h"
  20. #include "llvm/IR/Intrinsics.h"
  21. #include "llvm/IR/Module.h"
  22. #include "llvm/Pass.h"
  23. #include "llvm/Transforms/Utils/LoopUtils.h"
  24. using namespace llvm;
  25. namespace {
  26. unsigned getOpcode(Intrinsic::ID ID) {
  27. switch (ID) {
  28. case Intrinsic::experimental_vector_reduce_v2_fadd:
  29. return Instruction::FAdd;
  30. case Intrinsic::experimental_vector_reduce_v2_fmul:
  31. return Instruction::FMul;
  32. case Intrinsic::experimental_vector_reduce_add:
  33. return Instruction::Add;
  34. case Intrinsic::experimental_vector_reduce_mul:
  35. return Instruction::Mul;
  36. case Intrinsic::experimental_vector_reduce_and:
  37. return Instruction::And;
  38. case Intrinsic::experimental_vector_reduce_or:
  39. return Instruction::Or;
  40. case Intrinsic::experimental_vector_reduce_xor:
  41. return Instruction::Xor;
  42. case Intrinsic::experimental_vector_reduce_smax:
  43. case Intrinsic::experimental_vector_reduce_smin:
  44. case Intrinsic::experimental_vector_reduce_umax:
  45. case Intrinsic::experimental_vector_reduce_umin:
  46. return Instruction::ICmp;
  47. case Intrinsic::experimental_vector_reduce_fmax:
  48. case Intrinsic::experimental_vector_reduce_fmin:
  49. return Instruction::FCmp;
  50. default:
  51. llvm_unreachable("Unexpected ID");
  52. }
  53. }
  54. RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
  55. switch (ID) {
  56. case Intrinsic::experimental_vector_reduce_smax:
  57. return RecurrenceDescriptor::MRK_SIntMax;
  58. case Intrinsic::experimental_vector_reduce_smin:
  59. return RecurrenceDescriptor::MRK_SIntMin;
  60. case Intrinsic::experimental_vector_reduce_umax:
  61. return RecurrenceDescriptor::MRK_UIntMax;
  62. case Intrinsic::experimental_vector_reduce_umin:
  63. return RecurrenceDescriptor::MRK_UIntMin;
  64. case Intrinsic::experimental_vector_reduce_fmax:
  65. return RecurrenceDescriptor::MRK_FloatMax;
  66. case Intrinsic::experimental_vector_reduce_fmin:
  67. return RecurrenceDescriptor::MRK_FloatMin;
  68. default:
  69. return RecurrenceDescriptor::MRK_Invalid;
  70. }
  71. }
  72. bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
  73. bool Changed = false;
  74. SmallVector<IntrinsicInst *, 4> Worklist;
  75. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
  76. if (auto II = dyn_cast<IntrinsicInst>(&*I))
  77. Worklist.push_back(II);
  78. for (auto *II : Worklist) {
  79. if (!TTI->shouldExpandReduction(II))
  80. continue;
  81. FastMathFlags FMF =
  82. isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
  83. Intrinsic::ID ID = II->getIntrinsicID();
  84. RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
  85. Value *Rdx = nullptr;
  86. IRBuilder<> Builder(II);
  87. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  88. Builder.setFastMathFlags(FMF);
  89. switch (ID) {
  90. case Intrinsic::experimental_vector_reduce_v2_fadd:
  91. case Intrinsic::experimental_vector_reduce_v2_fmul: {
  92. // FMFs must be attached to the call, otherwise it's an ordered reduction
  93. // and it can't be handled by generating a shuffle sequence.
  94. Value *Acc = II->getArgOperand(0);
  95. Value *Vec = II->getArgOperand(1);
  96. if (!FMF.allowReassoc())
  97. Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
  98. else {
  99. Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
  100. Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
  101. Acc, Rdx, "bin.rdx");
  102. }
  103. } break;
  104. case Intrinsic::experimental_vector_reduce_add:
  105. case Intrinsic::experimental_vector_reduce_mul:
  106. case Intrinsic::experimental_vector_reduce_and:
  107. case Intrinsic::experimental_vector_reduce_or:
  108. case Intrinsic::experimental_vector_reduce_xor:
  109. case Intrinsic::experimental_vector_reduce_smax:
  110. case Intrinsic::experimental_vector_reduce_smin:
  111. case Intrinsic::experimental_vector_reduce_umax:
  112. case Intrinsic::experimental_vector_reduce_umin:
  113. case Intrinsic::experimental_vector_reduce_fmax:
  114. case Intrinsic::experimental_vector_reduce_fmin: {
  115. Value *Vec = II->getArgOperand(0);
  116. Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
  117. } break;
  118. default:
  119. continue;
  120. }
  121. II->replaceAllUsesWith(Rdx);
  122. II->eraseFromParent();
  123. Changed = true;
  124. }
  125. return Changed;
  126. }
  127. class ExpandReductions : public FunctionPass {
  128. public:
  129. static char ID;
  130. ExpandReductions() : FunctionPass(ID) {
  131. initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
  132. }
  133. bool runOnFunction(Function &F) override {
  134. const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  135. return expandReductions(F, TTI);
  136. }
  137. void getAnalysisUsage(AnalysisUsage &AU) const override {
  138. AU.addRequired<TargetTransformInfoWrapperPass>();
  139. AU.setPreservesCFG();
  140. }
  141. };
  142. }
  143. char ExpandReductions::ID;
  144. INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
  145. "Expand reduction intrinsics", false, false)
  146. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  147. INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
  148. "Expand reduction intrinsics", false, false)
  149. FunctionPass *llvm::createExpandReductionsPass() {
  150. return new ExpandReductions();
  151. }
  152. PreservedAnalyses ExpandReductionsPass::run(Function &F,
  153. FunctionAnalysisManager &AM) {
  154. const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  155. if (!expandReductions(F, &TTI))
  156. return PreservedAnalyses::all();
  157. PreservedAnalyses PA;
  158. PA.preserveSet<CFGAnalyses>();
  159. return PA;
  160. }