ICF.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. //===- ICF.cpp ------------------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // ICF is short for Identical Code Folding. That is a size optimization to
  10. // identify and merge two or more read-only sections (typically functions)
  11. // that happened to have the same contents. It usually reduces output size
  12. // by a few percent.
  13. //
  14. // On Windows, ICF is enabled by default.
  15. //
  16. // See ELF/ICF.cpp for the details about the algorithm.
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #include "ICF.h"
  20. #include "Chunks.h"
  21. #include "Symbols.h"
  22. #include "lld/Common/ErrorHandler.h"
  23. #include "lld/Common/Threads.h"
  24. #include "lld/Common/Timer.h"
  25. #include "llvm/ADT/Hashing.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/Parallel.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. #include "llvm/Support/xxhash.h"
  30. #include <algorithm>
  31. #include <atomic>
  32. #include <vector>
  33. using namespace llvm;
  34. namespace lld {
  35. namespace coff {
  36. static Timer icfTimer("ICF", Timer::root());
  37. class ICF {
  38. public:
  39. void run(ArrayRef<Chunk *> v);
  40. private:
  41. void segregate(size_t begin, size_t end, bool constant);
  42. bool assocEquals(const SectionChunk *a, const SectionChunk *b);
  43. bool equalsConstant(const SectionChunk *a, const SectionChunk *b);
  44. bool equalsVariable(const SectionChunk *a, const SectionChunk *b);
  45. bool isEligible(SectionChunk *c);
  46. size_t findBoundary(size_t begin, size_t end);
  47. void forEachClassRange(size_t begin, size_t end,
  48. std::function<void(size_t, size_t)> fn);
  49. void forEachClass(std::function<void(size_t, size_t)> fn);
  50. std::vector<SectionChunk *> chunks;
  51. int cnt = 0;
  52. std::atomic<bool> repeat = {false};
  53. };
  54. // Returns true if section S is subject of ICF.
  55. //
  56. // Microsoft's documentation
  57. // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
  58. // 2017) says that /opt:icf folds both functions and read-only data.
  59. // Despite that, the MSVC linker folds only functions. We found
  60. // a few instances of programs that are not safe for data merging.
  61. // Therefore, we merge only functions just like the MSVC tool. However, we also
  62. // merge read-only sections in a couple of cases where the address of the
  63. // section is insignificant to the user program and the behaviour matches that
  64. // of the Visual C++ linker.
  65. bool ICF::isEligible(SectionChunk *c) {
  66. // Non-comdat chunks, dead chunks, and writable chunks are not eligible.
  67. bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
  68. if (!c->isCOMDAT() || !c->live || writable)
  69. return false;
  70. // Code sections are eligible.
  71. if (c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
  72. return true;
  73. // .pdata and .xdata unwind info sections are eligible.
  74. StringRef outSecName = c->getSectionName().split('$').first;
  75. if (outSecName == ".pdata" || outSecName == ".xdata")
  76. return true;
  77. // So are vtables.
  78. if (c->sym && c->sym->getName().startswith("??_7"))
  79. return true;
  80. // Anything else not in an address-significance table is eligible.
  81. return !c->keepUnique;
  82. }
  83. // Split an equivalence class into smaller classes.
  84. void ICF::segregate(size_t begin, size_t end, bool constant) {
  85. while (begin < end) {
  86. // Divide [Begin, End) into two. Let Mid be the start index of the
  87. // second group.
  88. auto bound = std::stable_partition(
  89. chunks.begin() + begin + 1, chunks.begin() + end, [&](SectionChunk *s) {
  90. if (constant)
  91. return equalsConstant(chunks[begin], s);
  92. return equalsVariable(chunks[begin], s);
  93. });
  94. size_t mid = bound - chunks.begin();
  95. // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an
  96. // equivalence class ID because every group ends with a unique index.
  97. for (size_t i = begin; i < mid; ++i)
  98. chunks[i]->eqClass[(cnt + 1) % 2] = mid;
  99. // If we created a group, we need to iterate the main loop again.
  100. if (mid != end)
  101. repeat = true;
  102. begin = mid;
  103. }
  104. }
  105. // Returns true if two sections' associative children are equal.
  106. bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) {
  107. auto childClasses = [&](const SectionChunk *sc) {
  108. std::vector<uint32_t> classes;
  109. for (const SectionChunk &c : sc->children())
  110. if (!c.getSectionName().startswith(".debug") &&
  111. c.getSectionName() != ".gfids$y" && c.getSectionName() != ".gljmp$y")
  112. classes.push_back(c.eqClass[cnt % 2]);
  113. return classes;
  114. };
  115. return childClasses(a) == childClasses(b);
  116. }
  117. // Compare "non-moving" part of two sections, namely everything
  118. // except relocation targets.
  119. bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
  120. if (a->relocsSize != b->relocsSize)
  121. return false;
  122. // Compare relocations.
  123. auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
  124. if (r1.Type != r2.Type ||
  125. r1.VirtualAddress != r2.VirtualAddress) {
  126. return false;
  127. }
  128. Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
  129. Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
  130. if (b1 == b2)
  131. return true;
  132. if (auto *d1 = dyn_cast<DefinedRegular>(b1))
  133. if (auto *d2 = dyn_cast<DefinedRegular>(b2))
  134. return d1->getValue() == d2->getValue() &&
  135. d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
  136. return false;
  137. };
  138. if (!std::equal(a->getRelocs().begin(), a->getRelocs().end(),
  139. b->getRelocs().begin(), eq))
  140. return false;
  141. // Compare section attributes and contents.
  142. return a->getOutputCharacteristics() == b->getOutputCharacteristics() &&
  143. a->getSectionName() == b->getSectionName() &&
  144. a->header->SizeOfRawData == b->header->SizeOfRawData &&
  145. a->checksum == b->checksum && a->getContents() == b->getContents() &&
  146. assocEquals(a, b);
  147. }
  148. // Compare "moving" part of two sections, namely relocation targets.
  149. bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
  150. // Compare relocations.
  151. auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
  152. Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
  153. Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
  154. if (b1 == b2)
  155. return true;
  156. if (auto *d1 = dyn_cast<DefinedRegular>(b1))
  157. if (auto *d2 = dyn_cast<DefinedRegular>(b2))
  158. return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
  159. return false;
  160. };
  161. return std::equal(a->getRelocs().begin(), a->getRelocs().end(),
  162. b->getRelocs().begin(), eq) &&
  163. assocEquals(a, b);
  164. }
  165. // Find the first Chunk after Begin that has a different class from Begin.
  166. size_t ICF::findBoundary(size_t begin, size_t end) {
  167. for (size_t i = begin + 1; i < end; ++i)
  168. if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2])
  169. return i;
  170. return end;
  171. }
  172. void ICF::forEachClassRange(size_t begin, size_t end,
  173. std::function<void(size_t, size_t)> fn) {
  174. while (begin < end) {
  175. size_t mid = findBoundary(begin, end);
  176. fn(begin, mid);
  177. begin = mid;
  178. }
  179. }
  180. // Call Fn on each class group.
  181. void ICF::forEachClass(std::function<void(size_t, size_t)> fn) {
  182. // If the number of sections are too small to use threading,
  183. // call Fn sequentially.
  184. if (chunks.size() < 1024) {
  185. forEachClassRange(0, chunks.size(), fn);
  186. ++cnt;
  187. return;
  188. }
  189. // Shard into non-overlapping intervals, and call Fn in parallel.
  190. // The sharding must be completed before any calls to Fn are made
  191. // so that Fn can modify the Chunks in its shard without causing data
  192. // races.
  193. const size_t numShards = 256;
  194. size_t step = chunks.size() / numShards;
  195. size_t boundaries[numShards + 1];
  196. boundaries[0] = 0;
  197. boundaries[numShards] = chunks.size();
  198. parallelForEachN(1, numShards, [&](size_t i) {
  199. boundaries[i] = findBoundary((i - 1) * step, chunks.size());
  200. });
  201. parallelForEachN(1, numShards + 1, [&](size_t i) {
  202. if (boundaries[i - 1] < boundaries[i]) {
  203. forEachClassRange(boundaries[i - 1], boundaries[i], fn);
  204. }
  205. });
  206. ++cnt;
  207. }
  208. // Merge identical COMDAT sections.
  209. // Two sections are considered the same if their section headers,
  210. // contents and relocations are all the same.
  211. void ICF::run(ArrayRef<Chunk *> vec) {
  212. ScopedTimer t(icfTimer);
  213. // Collect only mergeable sections and group by hash value.
  214. uint32_t nextId = 1;
  215. for (Chunk *c : vec) {
  216. if (auto *sc = dyn_cast<SectionChunk>(c)) {
  217. if (isEligible(sc))
  218. chunks.push_back(sc);
  219. else
  220. sc->eqClass[0] = nextId++;
  221. }
  222. }
  223. // Make sure that ICF doesn't merge sections that are being handled by string
  224. // tail merging.
  225. for (MergeChunk *mc : MergeChunk::instances)
  226. if (mc)
  227. for (SectionChunk *sc : mc->sections)
  228. sc->eqClass[0] = nextId++;
  229. // Initially, we use hash values to partition sections.
  230. parallelForEach(chunks, [&](SectionChunk *sc) {
  231. sc->eqClass[0] = xxHash64(sc->getContents());
  232. });
  233. // Combine the hashes of the sections referenced by each section into its
  234. // hash.
  235. for (unsigned cnt = 0; cnt != 2; ++cnt) {
  236. parallelForEach(chunks, [&](SectionChunk *sc) {
  237. uint32_t hash = sc->eqClass[cnt % 2];
  238. for (Symbol *b : sc->symbols())
  239. if (auto *sym = dyn_cast_or_null<DefinedRegular>(b))
  240. hash += sym->getChunk()->eqClass[cnt % 2];
  241. // Set MSB to 1 to avoid collisions with non-hash classes.
  242. sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31);
  243. });
  244. }
  245. // From now on, sections in Chunks are ordered so that sections in
  246. // the same group are consecutive in the vector.
  247. llvm::stable_sort(chunks, [](const SectionChunk *a, const SectionChunk *b) {
  248. return a->eqClass[0] < b->eqClass[0];
  249. });
  250. // Compare static contents and assign unique IDs for each static content.
  251. forEachClass([&](size_t begin, size_t end) { segregate(begin, end, true); });
  252. // Split groups by comparing relocations until convergence is obtained.
  253. do {
  254. repeat = false;
  255. forEachClass(
  256. [&](size_t begin, size_t end) { segregate(begin, end, false); });
  257. } while (repeat);
  258. log("ICF needed " + Twine(cnt) + " iterations");
  259. // Merge sections in the same classes.
  260. forEachClass([&](size_t begin, size_t end) {
  261. if (end - begin == 1)
  262. return;
  263. log("Selected " + chunks[begin]->getDebugName());
  264. for (size_t i = begin + 1; i < end; ++i) {
  265. log(" Removed " + chunks[i]->getDebugName());
  266. chunks[begin]->replace(chunks[i]);
  267. }
  268. });
  269. }
  270. // Entry point to ICF.
  271. void doICF(ArrayRef<Chunk *> chunks) { ICF().run(chunks); }
  272. } // namespace coff
  273. } // namespace lld