/******************************************************************************* copyright: Copyright (C) 2007 Daniel Keep. All rights reserved. license: BSD style: $(LICENSE) version: Initial release: July 2007 author: Daniel Keep *******************************************************************************/ module tango.io.compression.Zlib; import tango.core.Exception : TracedException; import tango.io.model.IConduit : InputStream, OutputStream; import tango.io.Conduit : InputFilter, OutputFilter; import tango.stdc.stringz : fromUtf8z; /* This constant controls the size of the input/output buffers we use * internally. This should be a fairly sane value (it's suggested by the zlib * documentation), that should only need changing for memory-constrained * platforms/use cases. * * An alternative would be to make the chunk size a template parameter to the * filters themselves, but Tango already has more than enough template * parameters getting in the way :) */ private const CHUNKSIZE = 256 * 1024; /******************************************************************************* This input filter can be used to perform decompression of zlib streams. *******************************************************************************/ class ZlibDecompressionFilter : InputFilter { private { /* Used to make sure we don't try to perform operations on a dead * stream. */ bool zs_valid = false; z_stream zs; ubyte[CHUNKSIZE] in_chunk; } /*************************************************************************** Constructs a new zlib decompression filter. You need to pass in the stream that the decompression filter will read from. If you are using this filter with a conduit, the idiom to use is: --- myConduit.attach(new ZlibDecompressionFilter(myConduit.input)); myConduit.input.read(myBuffer); --- ***************************************************************************/ this(InputStream stream) { super(stream); // Allocate inflate state with( zs ) { zalloc = null; zfree = null; opaque = null; avail_in = 0; next_in = null; } auto ret = inflateInit(&zs); if( ret != Z_OK ) throw new ZlibException(ret); zs_valid = true; } ~this() { if( zs_valid ) kill_zs(); } /*************************************************************************** Decompresses data from the underlying conduit into a target array. Returns the number of bytes stored into dst, which may be less than requested. ***************************************************************************/ uint read(void[] dst) { check_valid(); // Check to see if we've run out of input data. If we have, get some // more. if( zs.avail_in == 0 ) { auto len = next.read(in_chunk); zs.avail_in = len; zs.next_in = in_chunk.ptr; } // We'll tell zlib to inflate straight into the target array. zs.avail_out = dst.length; zs.next_out = cast(ubyte*)dst.ptr; auto ret = inflate(&zs, Z_NO_FLUSH); switch( ret ) { case Z_NEED_DICT: // Whilst not technically an error, this should never happen // for general-use code, so treat it as an error. case Z_DATA_ERROR: case Z_MEM_ERROR: kill_zs(); throw new ZlibException(ret); break; case Z_STREAM_END: // zlib stream is finished; kill the stream so we don't try to // read from it again. kill_zs(); break; default: } return dst.length - zs.avail_out; } // This function kills the stream: it deallocates the internal state, and // unsets the zs_valid flag. private void kill_zs() { check_valid(); inflateEnd(&zs); zs_valid = false; } // Asserts that the stream is still valid and usable (except that this // check doesn't get elided with -release). private void check_valid() { if( !zs_valid ) throw new ZlibStreamClosedException; } } /******************************************************************************* This output filter can be used to perform compression of data into a zlib stream. *******************************************************************************/ class ZlibCompressionFilter : OutputFilter { /*************************************************************************** This enumeration represents several pre-defined compression levels. None instructs zlib to perform no compression whatsoever, and simply store the data stream. Note that this actually expands the stream slightly to accommodate the zlib stream metadata. Fast instructs zlib to perform a minimal amount of compression, Best indicates that you want the maximum level of compression and Normal (the default level) is a compromise between the two. The exact compression level Normal represents is determined by the underlying zlib library, but is typically level 6. Any integer between -1 and 9 inclusive may be used as a level, although the symbols in this enumeration should suffice for most use-cases. ***************************************************************************/ enum Level : int { Normal = -1, None = 0, Fast = 1, Best = 9 } private { bool zs_valid = false; z_stream zs; ubyte[CHUNKSIZE] out_chunk; } /*************************************************************************** Constructs a new zlib compression filter. You need to pass in the stream that the compression filter will write to. If you are using this filter with a conduit, the idiom to use is: --- myConduit.attach(new ZlibCompressionFilter(myConduit.output)); myConduit.output.write(myBuffer); --- ***************************************************************************/ this(OutputStream stream, Level level = Level.Normal) { super(stream); // Allocate deflate state with( zs ) { zalloc = null; zfree = null; opaque = null; } auto ret = deflateInit(&zs, level); if( ret != Z_OK ) throw new ZlibException(ret); zs_valid = true; } ~this() { if( zs_valid ) kill_zs(); } /*************************************************************************** Compresses the given data to the underlying conduit. Returns the number of bytes from src that were compressed, which may be less than given. ***************************************************************************/ uint write(void[] src) { check_valid(); scope(failure) kill_zs(); zs.avail_in = src.length; zs.next_in = cast(ubyte*)src.ptr; do { zs.avail_out = out_chunk.length; zs.next_out = out_chunk.ptr; auto ret = deflate(&zs, Z_NO_FLUSH); if( ret == Z_STREAM_ERROR ) throw new ZlibException(ret); auto have = out_chunk.length-zs.avail_out; next.write(out_chunk[0..have]); } // Loop while we are still using up the whole output buffer while( zs.avail_out == 0 ); assert( zs.avail_in == 0, "failed to compress all provided data" ); return src.length; } /*************************************************************************** Purge any buffered content. Calling this will implicitly end the zlib stream, so it should not be called until you are finished compressing data. Any calls to either write or flush after a compression filter has been flushed will throw an exception. ***************************************************************************/ void flush() { check_valid(); scope(failure) kill_zs(); zs.avail_in = 0; zs.next_in = null; bool finished = false; do { zs.avail_out = out_chunk.length; zs.next_out = out_chunk.ptr; auto ret = deflate(&zs, Z_FINISH); switch( ret ) { case Z_OK: // Keep going break; case Z_STREAM_END: // We're done! finished = true; break; default: throw new ZlibException(ret); } auto have = out_chunk.length - zs.avail_out; if( have > 0 ) next.write(out_chunk[0..have]); } while( !finished ); next.flush(); kill_zs(); } // This function kills the stream: it deallocates the internal state, and // unsets the zs_valid flag. private void kill_zs() { check_valid(); deflateEnd(&zs); zs_valid = false; } // Asserts that the stream is still valid and usable (except that this // check doesn't get elided with -release). private void check_valid() { if( !zs_valid ) throw new ZlibStreamClosedException; } } /******************************************************************************* This exception is thrown if you attempt to perform a read, write or flush operation on a closed zlib filter stream. This can occur if the input stream has finished, or an output stream was flushed. *******************************************************************************/ class ZlibStreamClosedException : TracedException { this() { super("cannot operate on closed zlib stream"); } } /******************************************************************************* This exception is thrown when an error occurs in the underlying zlib library. Where possible, it will indicate both the name of the error, and any textural message zlib has provided. *******************************************************************************/ class ZlibException : TracedException { this(int code) { super(codeName(code)); } this(int code, char* msg) { super(codeName(code)~": "~fromUtf8z(msg)); } protected char[] codeName(int code) { char[] name; switch( code ) { case Z_OK: name = "Z_OK"; break; case Z_STREAM_END: name = "Z_STREAM_END"; break; case Z_NEED_DICT: name = "Z_NEED_DICT"; break; case Z_ERRNO: name = "Z_ERRNO"; break; case Z_STREAM_ERROR: name = "Z_STREAM_ERROR"; break; case Z_DATA_ERROR: name = "Z_DATA_ERROR"; break; case Z_MEM_ERROR: name = "Z_MEM_ERROR"; break; case Z_BUF_ERROR: name = "Z_BUF_ERROR"; break; case Z_VERSION_ERROR: name = "Z_VERSION_ERROR"; break; default: name = "Z_UNKNOWN"; } return name; } } /* ***************************************************************************** This next section contains the binding to the zlib C api. Should this be moved out to a separate module (and if so, where?) ***************************************************************************** */ private { // This is used as a "minimum version" check. const ZLIB_VERSION = "1.2.3"; alias void* function(void* opaque, uint items, uint size) alloc_func; alias void function(void* opaque, void* address) free_func; struct z_stream { ubyte* next_in; uint avail_in; uint total_in; ubyte* next_out; uint avail_out; uint total_out; char* msg; void* state; alloc_func zalloc; free_func zfree; void* opaque; int data_type; uint adler; uint reserved; } enum { Z_NO_FLUSH = 0, Z_PARTIAL_FLUSH = 1, Z_SYNC_FLUSH = 2, Z_FULL_FLUSH = 3, Z_FINISH = 4, Z_BLOCK = 5 } enum { Z_OK = 0, Z_STREAM_END = 1, Z_NEED_DICT = 2, Z_ERRNO = -1, Z_STREAM_ERROR = -2, Z_DATA_ERROR = -3, Z_MEM_ERROR = -4, Z_BUF_ERROR = -5, Z_VERSION_ERROR = -6 } enum { Z_NO_COMPRESSION = 0, Z_BEST_SPEED = 1, Z_BEST_COMPRESSION = 2, Z_DEFAULT_COMPRESSION = -1 } // Evil, evil hacks. Blame the zlib peoples... int deflateInit(z_stream* strm, int level) { return deflateInit_(strm, level, ZLIB_VERSION.ptr, z_stream.sizeof); } int inflateInit(z_stream* strm) { return inflateInit_(strm, ZLIB_VERSION.ptr, z_stream.sizeof); } extern(C) { char* zlibVersion(); int deflateInit_(z_stream*, int, char*, int); int deflate(z_stream*, int); int deflateEnd(z_stream*); int inflateInit_(z_stream*, char*, int); int inflate(z_stream*, int); int inflateEnd(z_stream*); } } /* ***************************************************************************** This section contains a simple unit test for this module. It is hidden behind a version statement because it introduces additional dependencies. ***************************************************************************** */ version(Unittest): import tango.io.MemoryConduit : MemoryConduit; unittest { // One ring to rule them all, one ring to find them, // One ring to bring them all and in the darkness bind them. const char[] message = "Ash nazg durbatulûk, ash nazg gimbatul, " "ash nazg thrakatulûk, agh burzum-ishi krimpatul."; // This compressed data was created using Python 2.5's built in zlib // module, with the default compression level. const ubyte[] message_z = [ 0x78,0x9c,0x73,0x2c,0xce,0x50,0xc8,0x4b, 0xac,0x4a,0x57,0x48,0x29,0x2d,0x4a,0x4a, 0x2c,0x29,0xcd,0x39,0xbc,0x3b,0x5b,0x47, 0x21,0x11,0x26,0x9a,0x9e,0x99,0x0b,0x16, 0x45,0x12,0x2a,0xc9,0x28,0x4a,0xcc,0x46, 0xa8,0x4c,0xcf,0x50,0x48,0x2a,0x2d,0xaa, 0x2a,0xcd,0xd5,0xcd,0x2c,0xce,0xc8,0x54, 0xc8,0x2e,0xca,0xcc,0x2d,0x00,0xc9,0xea, 0x01,0x00,0x1f,0xe3,0x22,0x99]; scope cond_z = new MemoryConduit; scope comp = new ZlibCompressionFilter(cond_z.output); cond_z.attach(comp); cond_z.output.write(message); cond_z.output.flush(); assert( message_z == cast(ubyte[])(cond_z.slice) ); scope decomp = new ZlibDecompressionFilter(cond_z.input); cond_z.attach(decomp); auto buffer = new ubyte[256]; buffer = buffer[0..cond_z.input.read(buffer)]; assert( cast(ubyte[])message == buffer ); }