Compressor.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by Reid Spencer and is distributed under the
  6. // University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the llvm::Compressor class, an abstraction for memory
  11. // block compression.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Config/config.h"
  15. #include "llvm/Support/Compressor.h"
  16. #include "llvm/ADT/StringExtras.h"
  17. #include <cassert>
  18. #include <string>
  19. #include <ostream>
  20. #include "bzip2/bzlib.h"
  21. using namespace llvm;
  22. enum CompressionTypes {
  23. COMP_TYPE_NONE = '0',
  24. COMP_TYPE_BZIP2 = '2'
  25. };
  26. static int getdata(char*& buffer, size_t &size,
  27. llvm::Compressor::OutputDataCallback* cb, void* context) {
  28. buffer = 0;
  29. size = 0;
  30. int result = (*cb)(buffer, size, context);
  31. assert(buffer != 0 && "Invalid result from Compressor callback");
  32. assert(size != 0 && "Invalid result from Compressor callback");
  33. return result;
  34. }
  35. static int getdata_uns(char*& buffer, unsigned &size,
  36. llvm::Compressor::OutputDataCallback* cb, void* context)
  37. {
  38. size_t SizeOut;
  39. int Res = getdata(buffer, SizeOut, cb, context);
  40. size = SizeOut;
  41. return Res;
  42. }
  43. //===----------------------------------------------------------------------===//
  44. //=== NULLCOMP - a compression like set of routines that just copies data
  45. //=== without doing any compression. This is provided so that if the
  46. //=== configured environment doesn't have a compression library the
  47. //=== program can still work, albeit using more data/memory.
  48. //===----------------------------------------------------------------------===//
  49. struct NULLCOMP_stream {
  50. // User provided fields
  51. char* next_in;
  52. size_t avail_in;
  53. char* next_out;
  54. size_t avail_out;
  55. // Information fields
  56. size_t output_count; // Total count of output bytes
  57. };
  58. static void NULLCOMP_init(NULLCOMP_stream* s) {
  59. s->output_count = 0;
  60. }
  61. static bool NULLCOMP_compress(NULLCOMP_stream* s) {
  62. assert(s && "Invalid NULLCOMP_stream");
  63. assert(s->next_in != 0);
  64. assert(s->next_out != 0);
  65. assert(s->avail_in >= 1);
  66. assert(s->avail_out >= 1);
  67. if (s->avail_out >= s->avail_in) {
  68. ::memcpy(s->next_out, s->next_in, s->avail_in);
  69. s->output_count += s->avail_in;
  70. s->avail_out -= s->avail_in;
  71. s->next_in += s->avail_in;
  72. s->avail_in = 0;
  73. return true;
  74. } else {
  75. ::memcpy(s->next_out, s->next_in, s->avail_out);
  76. s->output_count += s->avail_out;
  77. s->avail_in -= s->avail_out;
  78. s->next_in += s->avail_out;
  79. s->avail_out = 0;
  80. return false;
  81. }
  82. }
  83. static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
  84. assert(s && "Invalid NULLCOMP_stream");
  85. assert(s->next_in != 0);
  86. assert(s->next_out != 0);
  87. assert(s->avail_in >= 1);
  88. assert(s->avail_out >= 1);
  89. if (s->avail_out >= s->avail_in) {
  90. ::memcpy(s->next_out, s->next_in, s->avail_in);
  91. s->output_count += s->avail_in;
  92. s->avail_out -= s->avail_in;
  93. s->next_in += s->avail_in;
  94. s->avail_in = 0;
  95. return true;
  96. } else {
  97. ::memcpy(s->next_out, s->next_in, s->avail_out);
  98. s->output_count += s->avail_out;
  99. s->avail_in -= s->avail_out;
  100. s->next_in += s->avail_out;
  101. s->avail_out = 0;
  102. return false;
  103. }
  104. }
  105. static void NULLCOMP_end(NULLCOMP_stream* strm) {
  106. }
  107. namespace {
  108. /// This structure is only used when a bytecode file is compressed.
  109. /// As bytecode is being decompressed, the memory buffer might need
  110. /// to be reallocated. The buffer allocation is handled in a callback
  111. /// and this structure is needed to retain information across calls
  112. /// to the callback.
  113. /// @brief An internal buffer object used for handling decompression
  114. struct BufferContext {
  115. char* buff;
  116. size_t size;
  117. BufferContext(size_t compressedSize) {
  118. // Null to indicate malloc of a new block
  119. buff = 0;
  120. // Compute the initial length of the uncompression buffer. Note that this
  121. // is twice the length of the compressed buffer and will be doubled again
  122. // in the callback for an initial allocation of 4x compressedSize. This
  123. // calculation is based on the typical compression ratio of bzip2 on LLVM
  124. // bytecode files which typically ranges in the 50%-75% range. Since we
  125. // typically get at least 50%, doubling is insufficient. By using a 4x
  126. // multiplier on the first allocation, we minimize the impact of having to
  127. // copy the buffer on reallocation.
  128. size = compressedSize*2;
  129. }
  130. /// trimTo - Reduce the size of the buffer down to the specified amount. This
  131. /// is useful after have read in the bytecode file to discard extra unused
  132. /// memory.
  133. ///
  134. void trimTo(size_t NewSize) {
  135. buff = (char*)::realloc(buff, NewSize);
  136. size = NewSize;
  137. }
  138. /// This function handles allocation of the buffer used for decompression of
  139. /// compressed bytecode files. It is called by Compressor::decompress which is
  140. /// called by BytecodeReader::ParseBytecode.
  141. static size_t callback(char*&buff, size_t &sz, void* ctxt){
  142. // Case the context variable to our BufferContext
  143. BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
  144. // Compute the new, doubled, size of the block
  145. size_t new_size = bc->size * 2;
  146. // Extend or allocate the block (realloc(0,n) == malloc(n))
  147. char* new_buff = (char*) ::realloc(bc->buff, new_size);
  148. // Figure out what to return to the Compressor. If this is the first call,
  149. // then bc->buff will be null. In this case we want to return the entire
  150. // buffer because there was no previous allocation. Otherwise, when the
  151. // buffer is reallocated, we save the new base pointer in the
  152. // BufferContext.buff field but return the address of only the extension,
  153. // mid-way through the buffer (since its size was doubled). Furthermore,
  154. // the sz result must be 1/2 the total size of the buffer.
  155. if (bc->buff == 0 ) {
  156. buff = bc->buff = new_buff;
  157. sz = new_size;
  158. } else {
  159. bc->buff = new_buff;
  160. buff = new_buff + bc->size;
  161. sz = bc->size;
  162. }
  163. // Retain the size of the allocated block
  164. bc->size = new_size;
  165. // Make sure we fail (return 1) if we didn't get any memory.
  166. return (bc->buff == 0 ? 1 : 0);
  167. }
  168. };
  169. } // end anonymous namespace
  170. namespace {
  171. // This structure retains the context when compressing the bytecode file. The
  172. // WriteCompressedData function below uses it to keep track of the previously
  173. // filled chunk of memory (which it writes) and how many bytes have been
  174. // written.
  175. struct WriterContext {
  176. // Initialize the context
  177. WriterContext(std::ostream*OS, size_t CS)
  178. : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
  179. // Make sure we clean up memory
  180. ~WriterContext() {
  181. if (chunk)
  182. delete [] chunk;
  183. }
  184. // Write the chunk
  185. void write(size_t size = 0) {
  186. size_t write_size = (size == 0 ? sz : size);
  187. Out->write(chunk,write_size);
  188. written += write_size;
  189. delete [] chunk;
  190. chunk = 0;
  191. sz = 0;
  192. }
  193. // This function is a callback used by the Compressor::compress function to
  194. // allocate memory for the compression buffer. This function fulfills that
  195. // responsibility but also writes the previous (now filled) buffer out to the
  196. // stream.
  197. static size_t callback(char*& buffer, size_t &size, void* context) {
  198. // Cast the context to the structure it must point to.
  199. WriterContext* ctxt = reinterpret_cast<WriterContext*>(context);
  200. // If there's a previously allocated chunk, it must now be filled with
  201. // compressed data, so we write it out and deallocate it.
  202. if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
  203. ctxt->write();
  204. }
  205. // Compute the size of the next chunk to allocate. We attempt to allocate
  206. // enough memory to handle the compression in a single memory allocation. In
  207. // general, the worst we do on compression of bytecode is about 50% so we
  208. // conservatively estimate compSize / 2 as the size needed for the
  209. // compression buffer. compSize is the size of the compressed data, provided
  210. // by WriteBytecodeToFile.
  211. size = ctxt->sz = ctxt->compSize / 2;
  212. // Allocate the chunks
  213. buffer = ctxt->chunk = new char [size];
  214. // We must return 1 if the allocation failed so that the Compressor knows
  215. // not to use the buffer pointer.
  216. return (ctxt->chunk == 0 ? 1 : 0);
  217. }
  218. char* chunk; // pointer to the chunk of memory filled by compression
  219. size_t sz; // size of chunk
  220. size_t written; // aggregate total of bytes written in all chunks
  221. size_t compSize; // size of the uncompressed buffer
  222. std::ostream* Out; // The stream we write the data to.
  223. };
  224. } // end anonymous namespace
  225. // Compress in one of three ways
  226. size_t Compressor::compress(const char* in, size_t size,
  227. OutputDataCallback* cb, void* context) {
  228. assert(in && "Can't compress null buffer");
  229. assert(size && "Can't compress empty buffer");
  230. assert(cb && "Can't compress without a callback function");
  231. size_t result = 0;
  232. // For small files, we just don't bother compressing. bzip2 isn't very good
  233. // with tiny files and can actually make the file larger, so we just avoid
  234. // it altogether.
  235. if (size > 64*1024) {
  236. // Set up the bz_stream
  237. bz_stream bzdata;
  238. bzdata.bzalloc = 0;
  239. bzdata.bzfree = 0;
  240. bzdata.opaque = 0;
  241. bzdata.next_in = (char*)in;
  242. bzdata.avail_in = size;
  243. bzdata.next_out = 0;
  244. bzdata.avail_out = 0;
  245. switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
  246. case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
  247. case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
  248. case BZ_MEM_ERROR: throw std::string("Out of memory");
  249. case BZ_OK:
  250. default:
  251. break;
  252. }
  253. // Get a block of memory
  254. if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
  255. BZ2_bzCompressEnd(&bzdata);
  256. throw std::string("Can't allocate output buffer");
  257. }
  258. // Put compression code in first byte
  259. (*bzdata.next_out++) = COMP_TYPE_BZIP2;
  260. bzdata.avail_out--;
  261. // Compress it
  262. int bzerr = BZ_FINISH_OK;
  263. while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
  264. if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
  265. BZ2_bzCompressEnd(&bzdata);
  266. throw std::string("Can't allocate output buffer");
  267. }
  268. }
  269. switch (bzerr) {
  270. case BZ_SEQUENCE_ERROR:
  271. case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
  272. case BZ_FINISH_OK:
  273. case BZ_STREAM_END: break;
  274. default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
  275. }
  276. // Finish
  277. result = bzdata.total_out_lo32 + 1;
  278. if (sizeof(size_t) == sizeof(uint64_t))
  279. result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32;
  280. BZ2_bzCompressEnd(&bzdata);
  281. } else {
  282. // Do null compression, for small files
  283. NULLCOMP_stream sdata;
  284. sdata.next_in = (char*)in;
  285. sdata.avail_in = size;
  286. NULLCOMP_init(&sdata);
  287. if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
  288. throw std::string("Can't allocate output buffer");
  289. }
  290. *(sdata.next_out++) = COMP_TYPE_NONE;
  291. sdata.avail_out--;
  292. while (!NULLCOMP_compress(&sdata)) {
  293. if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
  294. throw std::string("Can't allocate output buffer");
  295. }
  296. }
  297. result = sdata.output_count + 1;
  298. NULLCOMP_end(&sdata);
  299. }
  300. return result;
  301. }
  302. size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) {
  303. BufferContext bc(size);
  304. size_t result = compress(in,size,BufferContext::callback,(void*)&bc);
  305. bc.trimTo(result);
  306. out = bc.buff;
  307. return result;
  308. }
  309. size_t
  310. Compressor::compressToStream(const char*in, size_t size, std::ostream& out) {
  311. // Set up the context and writer
  312. WriterContext ctxt(&out, size / 2);
  313. // Compress everything after the magic number (which we'll alter).
  314. size_t zipSize = Compressor::compress(in,size,
  315. WriterContext::callback, (void*)&ctxt);
  316. if (ctxt.chunk) {
  317. ctxt.write(zipSize - ctxt.written);
  318. }
  319. return zipSize;
  320. }
  321. // Decompress in one of three ways
  322. size_t Compressor::decompress(const char *in, size_t size,
  323. OutputDataCallback* cb, void* context) {
  324. assert(in && "Can't decompress null buffer");
  325. assert(size > 1 && "Can't decompress empty buffer");
  326. assert(cb && "Can't decompress without a callback function");
  327. size_t result = 0;
  328. switch (*in++) {
  329. case COMP_TYPE_BZIP2: {
  330. // Set up the bz_stream
  331. bz_stream bzdata;
  332. bzdata.bzalloc = 0;
  333. bzdata.bzfree = 0;
  334. bzdata.opaque = 0;
  335. bzdata.next_in = (char*)in;
  336. bzdata.avail_in = size - 1;
  337. bzdata.next_out = 0;
  338. bzdata.avail_out = 0;
  339. switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
  340. case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
  341. case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
  342. case BZ_MEM_ERROR: throw std::string("Out of memory");
  343. case BZ_OK:
  344. default:
  345. break;
  346. }
  347. // Get a block of memory
  348. if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
  349. BZ2_bzDecompressEnd(&bzdata);
  350. throw std::string("Can't allocate output buffer");
  351. }
  352. // Decompress it
  353. int bzerr = BZ_OK;
  354. while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) &&
  355. bzdata.avail_in != 0 ) {
  356. if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
  357. BZ2_bzDecompressEnd(&bzdata);
  358. throw std::string("Can't allocate output buffer");
  359. }
  360. }
  361. switch (bzerr) {
  362. case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
  363. case BZ_MEM_ERROR: throw std::string("Out of memory");
  364. case BZ_DATA_ERROR: throw std::string("Data integrity error");
  365. case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
  366. case BZ_OK: throw std::string("Insufficient input for bzip2");
  367. case BZ_STREAM_END: break;
  368. default: throw("Ooops");
  369. }
  370. // Finish
  371. result = bzdata.total_out_lo32;
  372. if (sizeof(size_t) == sizeof(uint64_t))
  373. result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32);
  374. BZ2_bzDecompressEnd(&bzdata);
  375. break;
  376. }
  377. case COMP_TYPE_NONE: {
  378. NULLCOMP_stream sdata;
  379. sdata.next_in = (char*)in;
  380. sdata.avail_in = size - 1;
  381. NULLCOMP_init(&sdata);
  382. if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
  383. throw std::string("Can't allocate output buffer");
  384. }
  385. while (!NULLCOMP_decompress(&sdata)) {
  386. if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
  387. throw std::string("Can't allocate output buffer");
  388. }
  389. }
  390. result = sdata.output_count;
  391. NULLCOMP_end(&sdata);
  392. break;
  393. }
  394. default:
  395. throw std::string("Unknown type of compressed data");
  396. }
  397. return result;
  398. }
  399. size_t
  400. Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) {
  401. BufferContext bc(size);
  402. size_t result = decompress(in,size,BufferContext::callback,(void*)&bc);
  403. out = bc.buff;
  404. return result;
  405. }
  406. size_t
  407. Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){
  408. // Set up the context and writer
  409. WriterContext ctxt(&out,size / 2);
  410. // Decompress everything after the magic number (which we'll alter)
  411. size_t zipSize = Compressor::decompress(in,size,
  412. WriterContext::callback, (void*)&ctxt);
  413. if (ctxt.chunk) {
  414. ctxt.write(zipSize - ctxt.written);
  415. }
  416. return zipSize;
  417. }
  418. // vim: sw=2 ai