123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489 |
- //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file was developed by Reid Spencer and is distributed under the
- // University of Illinois Open Source License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements the llvm::Compressor class, an abstraction for memory
- // block compression.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Config/config.h"
- #include "llvm/Support/Compressor.h"
- #include "llvm/ADT/StringExtras.h"
- #include <cassert>
- #include <string>
- #include <ostream>
- #include "bzip2/bzlib.h"
- using namespace llvm;
- enum CompressionTypes {
- COMP_TYPE_NONE = '0',
- COMP_TYPE_BZIP2 = '2'
- };
- static int getdata(char*& buffer, size_t &size,
- llvm::Compressor::OutputDataCallback* cb, void* context) {
- buffer = 0;
- size = 0;
- int result = (*cb)(buffer, size, context);
- assert(buffer != 0 && "Invalid result from Compressor callback");
- assert(size != 0 && "Invalid result from Compressor callback");
- return result;
- }
- static int getdata_uns(char*& buffer, unsigned &size,
- llvm::Compressor::OutputDataCallback* cb, void* context)
- {
- size_t SizeOut;
- int Res = getdata(buffer, SizeOut, cb, context);
- size = SizeOut;
- return Res;
- }
- //===----------------------------------------------------------------------===//
- //=== NULLCOMP - a compression like set of routines that just copies data
- //=== without doing any compression. This is provided so that if the
- //=== configured environment doesn't have a compression library the
- //=== program can still work, albeit using more data/memory.
- //===----------------------------------------------------------------------===//
- struct NULLCOMP_stream {
- // User provided fields
- char* next_in;
- size_t avail_in;
- char* next_out;
- size_t avail_out;
- // Information fields
- size_t output_count; // Total count of output bytes
- };
- static void NULLCOMP_init(NULLCOMP_stream* s) {
- s->output_count = 0;
- }
- static bool NULLCOMP_compress(NULLCOMP_stream* s) {
- assert(s && "Invalid NULLCOMP_stream");
- assert(s->next_in != 0);
- assert(s->next_out != 0);
- assert(s->avail_in >= 1);
- assert(s->avail_out >= 1);
- if (s->avail_out >= s->avail_in) {
- ::memcpy(s->next_out, s->next_in, s->avail_in);
- s->output_count += s->avail_in;
- s->avail_out -= s->avail_in;
- s->next_in += s->avail_in;
- s->avail_in = 0;
- return true;
- } else {
- ::memcpy(s->next_out, s->next_in, s->avail_out);
- s->output_count += s->avail_out;
- s->avail_in -= s->avail_out;
- s->next_in += s->avail_out;
- s->avail_out = 0;
- return false;
- }
- }
- static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
- assert(s && "Invalid NULLCOMP_stream");
- assert(s->next_in != 0);
- assert(s->next_out != 0);
- assert(s->avail_in >= 1);
- assert(s->avail_out >= 1);
- if (s->avail_out >= s->avail_in) {
- ::memcpy(s->next_out, s->next_in, s->avail_in);
- s->output_count += s->avail_in;
- s->avail_out -= s->avail_in;
- s->next_in += s->avail_in;
- s->avail_in = 0;
- return true;
- } else {
- ::memcpy(s->next_out, s->next_in, s->avail_out);
- s->output_count += s->avail_out;
- s->avail_in -= s->avail_out;
- s->next_in += s->avail_out;
- s->avail_out = 0;
- return false;
- }
- }
- static void NULLCOMP_end(NULLCOMP_stream* strm) {
- }
- namespace {
- /// This structure is only used when a bytecode file is compressed.
- /// As bytecode is being decompressed, the memory buffer might need
- /// to be reallocated. The buffer allocation is handled in a callback
- /// and this structure is needed to retain information across calls
- /// to the callback.
- /// @brief An internal buffer object used for handling decompression
- struct BufferContext {
- char* buff;
- size_t size;
- BufferContext(size_t compressedSize) {
- // Null to indicate malloc of a new block
- buff = 0;
- // Compute the initial length of the uncompression buffer. Note that this
- // is twice the length of the compressed buffer and will be doubled again
- // in the callback for an initial allocation of 4x compressedSize. This
- // calculation is based on the typical compression ratio of bzip2 on LLVM
- // bytecode files which typically ranges in the 50%-75% range. Since we
- // typically get at least 50%, doubling is insufficient. By using a 4x
- // multiplier on the first allocation, we minimize the impact of having to
- // copy the buffer on reallocation.
- size = compressedSize*2;
- }
- /// trimTo - Reduce the size of the buffer down to the specified amount. This
- /// is useful after have read in the bytecode file to discard extra unused
- /// memory.
- ///
- void trimTo(size_t NewSize) {
- buff = (char*)::realloc(buff, NewSize);
- size = NewSize;
- }
- /// This function handles allocation of the buffer used for decompression of
- /// compressed bytecode files. It is called by Compressor::decompress which is
- /// called by BytecodeReader::ParseBytecode.
- static size_t callback(char*&buff, size_t &sz, void* ctxt){
- // Case the context variable to our BufferContext
- BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
- // Compute the new, doubled, size of the block
- size_t new_size = bc->size * 2;
- // Extend or allocate the block (realloc(0,n) == malloc(n))
- char* new_buff = (char*) ::realloc(bc->buff, new_size);
- // Figure out what to return to the Compressor. If this is the first call,
- // then bc->buff will be null. In this case we want to return the entire
- // buffer because there was no previous allocation. Otherwise, when the
- // buffer is reallocated, we save the new base pointer in the
- // BufferContext.buff field but return the address of only the extension,
- // mid-way through the buffer (since its size was doubled). Furthermore,
- // the sz result must be 1/2 the total size of the buffer.
- if (bc->buff == 0 ) {
- buff = bc->buff = new_buff;
- sz = new_size;
- } else {
- bc->buff = new_buff;
- buff = new_buff + bc->size;
- sz = bc->size;
- }
- // Retain the size of the allocated block
- bc->size = new_size;
- // Make sure we fail (return 1) if we didn't get any memory.
- return (bc->buff == 0 ? 1 : 0);
- }
- };
- } // end anonymous namespace
- namespace {
- // This structure retains the context when compressing the bytecode file. The
- // WriteCompressedData function below uses it to keep track of the previously
- // filled chunk of memory (which it writes) and how many bytes have been
- // written.
- struct WriterContext {
- // Initialize the context
- WriterContext(std::ostream*OS, size_t CS)
- : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
- // Make sure we clean up memory
- ~WriterContext() {
- if (chunk)
- delete [] chunk;
- }
- // Write the chunk
- void write(size_t size = 0) {
- size_t write_size = (size == 0 ? sz : size);
- Out->write(chunk,write_size);
- written += write_size;
- delete [] chunk;
- chunk = 0;
- sz = 0;
- }
- // This function is a callback used by the Compressor::compress function to
- // allocate memory for the compression buffer. This function fulfills that
- // responsibility but also writes the previous (now filled) buffer out to the
- // stream.
- static size_t callback(char*& buffer, size_t &size, void* context) {
- // Cast the context to the structure it must point to.
- WriterContext* ctxt = reinterpret_cast<WriterContext*>(context);
- // If there's a previously allocated chunk, it must now be filled with
- // compressed data, so we write it out and deallocate it.
- if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
- ctxt->write();
- }
- // Compute the size of the next chunk to allocate. We attempt to allocate
- // enough memory to handle the compression in a single memory allocation. In
- // general, the worst we do on compression of bytecode is about 50% so we
- // conservatively estimate compSize / 2 as the size needed for the
- // compression buffer. compSize is the size of the compressed data, provided
- // by WriteBytecodeToFile.
- size = ctxt->sz = ctxt->compSize / 2;
- // Allocate the chunks
- buffer = ctxt->chunk = new char [size];
- // We must return 1 if the allocation failed so that the Compressor knows
- // not to use the buffer pointer.
- return (ctxt->chunk == 0 ? 1 : 0);
- }
- char* chunk; // pointer to the chunk of memory filled by compression
- size_t sz; // size of chunk
- size_t written; // aggregate total of bytes written in all chunks
- size_t compSize; // size of the uncompressed buffer
- std::ostream* Out; // The stream we write the data to.
- };
- } // end anonymous namespace
- // Compress in one of three ways
- size_t Compressor::compress(const char* in, size_t size,
- OutputDataCallback* cb, void* context) {
- assert(in && "Can't compress null buffer");
- assert(size && "Can't compress empty buffer");
- assert(cb && "Can't compress without a callback function");
- size_t result = 0;
- // For small files, we just don't bother compressing. bzip2 isn't very good
- // with tiny files and can actually make the file larger, so we just avoid
- // it altogether.
- if (size > 64*1024) {
- // Set up the bz_stream
- bz_stream bzdata;
- bzdata.bzalloc = 0;
- bzdata.bzfree = 0;
- bzdata.opaque = 0;
- bzdata.next_in = (char*)in;
- bzdata.avail_in = size;
- bzdata.next_out = 0;
- bzdata.avail_out = 0;
- switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
- case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
- case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
- case BZ_MEM_ERROR: throw std::string("Out of memory");
- case BZ_OK:
- default:
- break;
- }
- // Get a block of memory
- if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
- BZ2_bzCompressEnd(&bzdata);
- throw std::string("Can't allocate output buffer");
- }
- // Put compression code in first byte
- (*bzdata.next_out++) = COMP_TYPE_BZIP2;
- bzdata.avail_out--;
- // Compress it
- int bzerr = BZ_FINISH_OK;
- while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
- if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
- BZ2_bzCompressEnd(&bzdata);
- throw std::string("Can't allocate output buffer");
- }
- }
- switch (bzerr) {
- case BZ_SEQUENCE_ERROR:
- case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
- case BZ_FINISH_OK:
- case BZ_STREAM_END: break;
- default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
- }
- // Finish
- result = bzdata.total_out_lo32 + 1;
- if (sizeof(size_t) == sizeof(uint64_t))
- result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32;
- BZ2_bzCompressEnd(&bzdata);
- } else {
- // Do null compression, for small files
- NULLCOMP_stream sdata;
- sdata.next_in = (char*)in;
- sdata.avail_in = size;
- NULLCOMP_init(&sdata);
- if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
- throw std::string("Can't allocate output buffer");
- }
- *(sdata.next_out++) = COMP_TYPE_NONE;
- sdata.avail_out--;
- while (!NULLCOMP_compress(&sdata)) {
- if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
- throw std::string("Can't allocate output buffer");
- }
- }
- result = sdata.output_count + 1;
- NULLCOMP_end(&sdata);
- }
- return result;
- }
- size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) {
- BufferContext bc(size);
- size_t result = compress(in,size,BufferContext::callback,(void*)&bc);
- bc.trimTo(result);
- out = bc.buff;
- return result;
- }
- size_t
- Compressor::compressToStream(const char*in, size_t size, std::ostream& out) {
- // Set up the context and writer
- WriterContext ctxt(&out, size / 2);
- // Compress everything after the magic number (which we'll alter).
- size_t zipSize = Compressor::compress(in,size,
- WriterContext::callback, (void*)&ctxt);
- if (ctxt.chunk) {
- ctxt.write(zipSize - ctxt.written);
- }
- return zipSize;
- }
- // Decompress in one of three ways
- size_t Compressor::decompress(const char *in, size_t size,
- OutputDataCallback* cb, void* context) {
- assert(in && "Can't decompress null buffer");
- assert(size > 1 && "Can't decompress empty buffer");
- assert(cb && "Can't decompress without a callback function");
- size_t result = 0;
- switch (*in++) {
- case COMP_TYPE_BZIP2: {
- // Set up the bz_stream
- bz_stream bzdata;
- bzdata.bzalloc = 0;
- bzdata.bzfree = 0;
- bzdata.opaque = 0;
- bzdata.next_in = (char*)in;
- bzdata.avail_in = size - 1;
- bzdata.next_out = 0;
- bzdata.avail_out = 0;
- switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
- case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
- case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
- case BZ_MEM_ERROR: throw std::string("Out of memory");
- case BZ_OK:
- default:
- break;
- }
- // Get a block of memory
- if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
- BZ2_bzDecompressEnd(&bzdata);
- throw std::string("Can't allocate output buffer");
- }
- // Decompress it
- int bzerr = BZ_OK;
- while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) &&
- bzdata.avail_in != 0 ) {
- if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
- BZ2_bzDecompressEnd(&bzdata);
- throw std::string("Can't allocate output buffer");
- }
- }
- switch (bzerr) {
- case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
- case BZ_MEM_ERROR: throw std::string("Out of memory");
- case BZ_DATA_ERROR: throw std::string("Data integrity error");
- case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
- case BZ_OK: throw std::string("Insufficient input for bzip2");
- case BZ_STREAM_END: break;
- default: throw("Ooops");
- }
- // Finish
- result = bzdata.total_out_lo32;
- if (sizeof(size_t) == sizeof(uint64_t))
- result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32);
- BZ2_bzDecompressEnd(&bzdata);
- break;
- }
- case COMP_TYPE_NONE: {
- NULLCOMP_stream sdata;
- sdata.next_in = (char*)in;
- sdata.avail_in = size - 1;
- NULLCOMP_init(&sdata);
- if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
- throw std::string("Can't allocate output buffer");
- }
- while (!NULLCOMP_decompress(&sdata)) {
- if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
- throw std::string("Can't allocate output buffer");
- }
- }
- result = sdata.output_count;
- NULLCOMP_end(&sdata);
- break;
- }
- default:
- throw std::string("Unknown type of compressed data");
- }
- return result;
- }
- size_t
- Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) {
- BufferContext bc(size);
- size_t result = decompress(in,size,BufferContext::callback,(void*)&bc);
- out = bc.buff;
- return result;
- }
- size_t
- Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){
- // Set up the context and writer
- WriterContext ctxt(&out,size / 2);
- // Decompress everything after the magic number (which we'll alter)
- size_t zipSize = Compressor::decompress(in,size,
- WriterContext::callback, (void*)&ctxt);
- if (ctxt.chunk) {
- ctxt.write(zipSize - ctxt.written);
- }
- return zipSize;
- }
- // vim: sw=2 ai
|