AtomicExpandLoadLinkedPass.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. //===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains a pass (at IR level) to replace atomic instructions with
  11. // appropriate (intrinsic-based) ldrex/strex loops.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/Passes.h"
  15. #include "llvm/IR/Function.h"
  16. #include "llvm/IR/IRBuilder.h"
  17. #include "llvm/IR/Instructions.h"
  18. #include "llvm/IR/Intrinsics.h"
  19. #include "llvm/IR/Module.h"
  20. #include "llvm/Support/Debug.h"
  21. #include "llvm/Target/TargetLowering.h"
  22. #include "llvm/Target/TargetMachine.h"
  23. #include "llvm/Target/TargetSubtargetInfo.h"
  24. using namespace llvm;
  25. #define DEBUG_TYPE "arm-atomic-expand"
  26. namespace {
  27. class AtomicExpandLoadLinked : public FunctionPass {
  28. const TargetMachine *TM;
  29. public:
  30. static char ID; // Pass identification, replacement for typeid
  31. explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
  32. : FunctionPass(ID), TM(TM) {
  33. initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
  34. }
  35. bool runOnFunction(Function &F) override;
  36. bool expandAtomicInsts(Function &F);
  37. bool expandAtomicLoad(LoadInst *LI);
  38. bool expandAtomicStore(StoreInst *LI);
  39. bool expandAtomicRMW(AtomicRMWInst *AI);
  40. bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
  41. AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
  42. void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
  43. };
  44. }
  45. char AtomicExpandLoadLinked::ID = 0;
  46. char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
  47. INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
  48. "Expand Atomic calls in terms of load-linked & store-conditional",
  49. false, false)
  50. FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
  51. return new AtomicExpandLoadLinked(TM);
  52. }
  53. bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
  54. if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
  55. return false;
  56. SmallVector<Instruction *, 1> AtomicInsts;
  57. // Changing control-flow while iterating through it is a bad idea, so gather a
  58. // list of all atomic instructions before we start.
  59. for (BasicBlock &BB : F)
  60. for (Instruction &Inst : BB) {
  61. if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
  62. (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
  63. (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
  64. AtomicInsts.push_back(&Inst);
  65. }
  66. bool MadeChange = false;
  67. for (Instruction *Inst : AtomicInsts) {
  68. if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR(
  69. Inst))
  70. continue;
  71. if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
  72. MadeChange |= expandAtomicRMW(AI);
  73. else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
  74. MadeChange |= expandAtomicCmpXchg(CI);
  75. else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
  76. MadeChange |= expandAtomicLoad(LI);
  77. else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
  78. MadeChange |= expandAtomicStore(SI);
  79. else
  80. llvm_unreachable("Unknown atomic instruction");
  81. }
  82. return MadeChange;
  83. }
  84. bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
  85. // Load instructions don't actually need a leading fence, even in the
  86. // SequentiallyConsistent case.
  87. AtomicOrdering MemOpOrder =
  88. TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
  89. ? Monotonic
  90. : LI->getOrdering();
  91. // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
  92. // an ldrexd (A3.5.3).
  93. IRBuilder<> Builder(LI);
  94. Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
  95. Builder, LI->getPointerOperand(), MemOpOrder);
  96. insertTrailingFence(Builder, LI->getOrdering());
  97. LI->replaceAllUsesWith(Val);
  98. LI->eraseFromParent();
  99. return true;
  100. }
  101. bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
  102. // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
  103. // we need a loop and the entire instruction is essentially an "atomicrmw
  104. // xchg" that ignores the value loaded.
  105. IRBuilder<> Builder(SI);
  106. AtomicRMWInst *AI =
  107. Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
  108. SI->getValueOperand(), SI->getOrdering());
  109. SI->eraseFromParent();
  110. // Now we have an appropriate swap instruction, lower it as usual.
  111. return expandAtomicRMW(AI);
  112. }
  113. bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
  114. AtomicOrdering Order = AI->getOrdering();
  115. Value *Addr = AI->getPointerOperand();
  116. BasicBlock *BB = AI->getParent();
  117. Function *F = BB->getParent();
  118. LLVMContext &Ctx = F->getContext();
  119. // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  120. //
  121. // The standard expansion we produce is:
  122. // [...]
  123. // fence?
  124. // atomicrmw.start:
  125. // %loaded = @load.linked(%addr)
  126. // %new = some_op iN %loaded, %incr
  127. // %stored = @store_conditional(%new, %addr)
  128. // %try_again = icmp i32 ne %stored, 0
  129. // br i1 %try_again, label %loop, label %atomicrmw.end
  130. // atomicrmw.end:
  131. // fence?
  132. // [...]
  133. BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
  134. BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
  135. // This grabs the DebugLoc from AI.
  136. IRBuilder<> Builder(AI);
  137. // The split call above "helpfully" added a branch at the end of BB (to the
  138. // wrong place), but we might want a fence too. It's easiest to just remove
  139. // the branch entirely.
  140. std::prev(BB->end())->eraseFromParent();
  141. Builder.SetInsertPoint(BB);
  142. AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
  143. Builder.CreateBr(LoopBB);
  144. // Start the main loop block now that we've taken care of the preliminaries.
  145. Builder.SetInsertPoint(LoopBB);
  146. Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
  147. Builder, Addr, MemOpOrder);
  148. Value *NewVal;
  149. switch (AI->getOperation()) {
  150. case AtomicRMWInst::Xchg:
  151. NewVal = AI->getValOperand();
  152. break;
  153. case AtomicRMWInst::Add:
  154. NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
  155. break;
  156. case AtomicRMWInst::Sub:
  157. NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
  158. break;
  159. case AtomicRMWInst::And:
  160. NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
  161. break;
  162. case AtomicRMWInst::Nand:
  163. NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
  164. "new");
  165. break;
  166. case AtomicRMWInst::Or:
  167. NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
  168. break;
  169. case AtomicRMWInst::Xor:
  170. NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
  171. break;
  172. case AtomicRMWInst::Max:
  173. NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
  174. NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
  175. break;
  176. case AtomicRMWInst::Min:
  177. NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
  178. NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
  179. break;
  180. case AtomicRMWInst::UMax:
  181. NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
  182. NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
  183. break;
  184. case AtomicRMWInst::UMin:
  185. NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
  186. NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
  187. break;
  188. default:
  189. llvm_unreachable("Unknown atomic op");
  190. }
  191. Value *StoreSuccess =
  192. TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
  193. Builder, NewVal, Addr, MemOpOrder);
  194. Value *TryAgain = Builder.CreateICmpNE(
  195. StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
  196. Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
  197. Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  198. insertTrailingFence(Builder, Order);
  199. AI->replaceAllUsesWith(Loaded);
  200. AI->eraseFromParent();
  201. return true;
  202. }
  203. bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  204. AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
  205. AtomicOrdering FailureOrder = CI->getFailureOrdering();
  206. Value *Addr = CI->getPointerOperand();
  207. BasicBlock *BB = CI->getParent();
  208. Function *F = BB->getParent();
  209. LLVMContext &Ctx = F->getContext();
  210. // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
  211. //
  212. // The full expansion we produce is:
  213. // [...]
  214. // fence?
  215. // cmpxchg.start:
  216. // %loaded = @load.linked(%addr)
  217. // %should_store = icmp eq %loaded, %desired
  218. // br i1 %should_store, label %cmpxchg.trystore,
  219. // label %cmpxchg.failure
  220. // cmpxchg.trystore:
  221. // %stored = @store_conditional(%new, %addr)
  222. // %success = icmp eq i32 %stored, 0
  223. // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
  224. // cmpxchg.success:
  225. // fence?
  226. // br label %cmpxchg.end
  227. // cmpxchg.failure:
  228. // fence?
  229. // br label %cmpxchg.end
  230. // cmpxchg.end:
  231. // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
  232. // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
  233. // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
  234. // [...]
  235. BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
  236. auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
  237. auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
  238. auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
  239. auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
  240. // This grabs the DebugLoc from CI
  241. IRBuilder<> Builder(CI);
  242. // The split call above "helpfully" added a branch at the end of BB (to the
  243. // wrong place), but we might want a fence too. It's easiest to just remove
  244. // the branch entirely.
  245. std::prev(BB->end())->eraseFromParent();
  246. Builder.SetInsertPoint(BB);
  247. AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
  248. Builder.CreateBr(LoopBB);
  249. // Start the main loop block now that we've taken care of the preliminaries.
  250. Builder.SetInsertPoint(LoopBB);
  251. Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
  252. Builder, Addr, MemOpOrder);
  253. Value *ShouldStore =
  254. Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
  255. // If the the cmpxchg doesn't actually need any ordering when it fails, we can
  256. // jump straight past that fence instruction (if it exists).
  257. Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
  258. Builder.SetInsertPoint(TryStoreBB);
  259. Value *StoreSuccess =
  260. TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
  261. Builder, CI->getNewValOperand(), Addr, MemOpOrder);
  262. StoreSuccess = Builder.CreateICmpEQ(
  263. StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
  264. Builder.CreateCondBr(StoreSuccess, SuccessBB,
  265. CI->isWeak() ? FailureBB : LoopBB);
  266. // Make sure later instructions don't get reordered with a fence if necessary.
  267. Builder.SetInsertPoint(SuccessBB);
  268. insertTrailingFence(Builder, SuccessOrder);
  269. Builder.CreateBr(ExitBB);
  270. Builder.SetInsertPoint(FailureBB);
  271. insertTrailingFence(Builder, FailureOrder);
  272. Builder.CreateBr(ExitBB);
  273. // Finally, we have control-flow based knowledge of whether the cmpxchg
  274. // succeeded or not. We expose this to later passes by converting any
  275. // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
  276. // Setup the builder so we can create any PHIs we need.
  277. Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  278. PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
  279. Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
  280. Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
  281. // Look for any users of the cmpxchg that are just comparing the loaded value
  282. // against the desired one, and replace them with the CFG-derived version.
  283. SmallVector<ExtractValueInst *, 2> PrunedInsts;
  284. for (auto User : CI->users()) {
  285. ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
  286. if (!EV)
  287. continue;
  288. assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
  289. "weird extraction from { iN, i1 }");
  290. if (EV->getIndices()[0] == 0)
  291. EV->replaceAllUsesWith(Loaded);
  292. else
  293. EV->replaceAllUsesWith(Success);
  294. PrunedInsts.push_back(EV);
  295. }
  296. // We can remove the instructions now we're no longer iterating through them.
  297. for (auto EV : PrunedInsts)
  298. EV->eraseFromParent();
  299. if (!CI->use_empty()) {
  300. // Some use of the full struct return that we don't understand has happened,
  301. // so we've got to reconstruct it properly.
  302. Value *Res;
  303. Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
  304. Res = Builder.CreateInsertValue(Res, Success, 1);
  305. CI->replaceAllUsesWith(Res);
  306. }
  307. CI->eraseFromParent();
  308. return true;
  309. }
  310. AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
  311. AtomicOrdering Ord) {
  312. if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
  313. return Ord;
  314. if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
  315. Builder.CreateFence(Release);
  316. // The exclusive operations don't need any barrier if we're adding separate
  317. // fences.
  318. return Monotonic;
  319. }
  320. void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
  321. AtomicOrdering Ord) {
  322. if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
  323. return;
  324. if (Ord == Acquire || Ord == AcquireRelease)
  325. Builder.CreateFence(Acquire);
  326. else if (Ord == SequentiallyConsistent)
  327. Builder.CreateFence(SequentiallyConsistent);
  328. }