One of my projects allowed the user to request a ZIP archive of files off the server, instead of downloading them individually. To avoid requiring (potentially) double the disk space, I wanted to stream the archive as I created it. Additionally, the total size of the uncompressed data could easily exceed 232 bytes, so I needed to use the Zip64 extensions.
In addition to providing the compression primitives, the excellent zlib package ships with a contributed ZIP generator minizip
. Unfortunately, this generator uses seek
to move back and forth within the generated file, so it wasn't suitable for my needs.
I eventually wrote my own utility to do this streaming. It took quite a few tries, as the spec is confusing and ambiguous. Once I figured out the spec, I wrote the code to always assume the worst (file size, number of files, etc); this costs a few extra bytes, but saves tons of logic.
The most interesting bits, at least regarding the ambiguous wording of the specification, are when we add a single file to the archive in Zip64Streamer::addFile
:
bool Zip64Streamer::addFile( const string & file ) { DEBUG( "af: adding file " << QS( file ) ); FileInfo fi; fi.path = m_sDir + "/" + file; fi.name = file; fi.offset = m_offset; fillDateTime( fi ); CharBuffer z64; write2( z64, Z64_EXTRA_FIELD_TAG ); write2( z64, LENGTH_PLACEHOLDER ); write8( z64, DEFER_UNCOMPRESSED_SIZE ); write8( z64, DEFER_COMPRESSED_SIZE ); fixupExtraFieldLength( z64 ); CharBuffer unix; write2( unix, UNIX_EXTRA_FIELD_TAG ); write2( unix, LENGTH_PLACEHOLDER ); write4( unix, fi.stat_atime ); write4( unix, fi.stat_mtime ); write2( unix, UNIX_ZIP_UID ); write2( unix, UNIX_ZIP_GID ); fixupExtraFieldLength( unix ); CharBuffer lh; // local header write4( lh, LOCAL_FILE_HEADER_SIG ); write2( lh, VERSION_NEEDED_TO_EXTRACT_4_5 ); write2( lh, GPB_DATA_DESC_FOLLOWS_DATA ); write2( lh, COMPRESSION_METHOD_DEFLATE ); write2( lh, fi.msdos_time ); write2( lh, fi.msdos_date ); write4( lh, DEFER_CRC32 ); write4( lh, FORCE_Z64_COMPRESSED_SIZE ); write4( lh, FORCE_Z64_UNCOMPRESSED_SIZE ); write2( lh, static_cast< uint16_t >( fi.name.size() ) ); write2( lh, static_cast< uint16_t >( z64.size() + unix.size() ) ); FINE( "af: " << file << ": writing header" ); emit( lh ); emitCopy( fi.name ); emit( z64 ); emit( unix ); emitCompressedData( fi ); FINE( "af: " << file << ": writing descriptor" ); CharBuffer dd; // data descriptor write4( dd, DATA_DESC_SIG ); write4( dd, fi.crc32 ); write8( dd, fi.compressed ); write8( dd, fi.uncompressed ); emit( dd ); // save info for eventual use in central directory m_fileInfo.push_back( fi ); return true; }
After we've added all the files, we finalize the archive in the destructor:
Zip64Streamer::~Zip64Streamer() { DEBUG( "dtor: finishing zip file" ); // save start of central directory const uint64_t centralDirOffset( m_offset ); // emit a central directory record for each file for ( const FileInfo & fi : m_fileInfo ) { FINE( "dtor: adding central dir record for " << QS( fi.name ) ); CharBuffer z64; write2( z64, Z64_EXTRA_FIELD_TAG ); write2( z64, LENGTH_PLACEHOLDER ); write8( z64, fi.uncompressed ); write8( z64, fi.compressed ); write8( z64, fi.offset ); fixupExtraFieldLength( z64 ); CharBuffer unix; write2( unix, UNIX_EXTRA_FIELD_TAG ); write2( unix, LENGTH_PLACEHOLDER ); write4( unix, fi.stat_atime ); write4( unix, fi.stat_mtime ); write2( unix, UNIX_ZIP_UID ); write2( unix, UNIX_ZIP_GID ); fixupExtraFieldLength( unix ); CharBuffer cd; write4( cd, CDIR_FILE_HEADER_SIG ); write2( cd, VERSION_CREATED_BY_4_5_UNIX ); write2( cd, VERSION_NEEDED_TO_EXTRACT_4_5 ); write2( cd, GPB_DATA_DESC_FOLLOWS_DATA ); write2( cd, COMPRESSION_METHOD_DEFLATE ); write2( cd, fi.msdos_time ); write2( cd, fi.msdos_date ); write4( cd, fi.crc32 ); write4( cd, FORCE_Z64_COMPRESSED_SIZE ); write4( cd, FORCE_Z64_UNCOMPRESSED_SIZE ); write2( cd, static_cast< uint16_t >( fi.name.size() ) ); write2( cd, static_cast< uint16_t >( z64.size() + unix.size() ) ); write2( cd, ZERO_COMMENT_LENGTH ); write2( cd, DISK_START_ZERO ); write2( cd, ZERO_INTERNAL_FILE_ATTR ); write4( cd, UNIX_EXTERNAL_FILE_ATTR ); write4( cd, FORCE_Z64_OFFSET ); emit( cd ); emitCopy( fi.name ); emit( z64 ); emit( unix ); } // how many bytes did that use? const uint64_t centralDirBytes( m_offset - centralDirOffset ); // and where are we now? const uint64_t z64EndOfCentralDirLoc( m_offset ); FINE( "dtor: central dir: " "bytes=" << centralDirBytes << ", " "offset=" << centralDirOffset ); DEBUG( "dtor: adding z64 end of central directory record @ " << m_offset ); CharBuffer z64; write4( z64, Z64_END_OF_CENTRAL_DIR_REC_SIG ); write8( z64, LENGTH_PLACEHOLDER ); write2( z64, VERSION_CREATED_BY_4_5_UNIX ); write2( z64, VERSION_NEEDED_TO_EXTRACT_4_5 ); write4( z64, DISK_NUMBER_ZERO ); write4( z64, DISK_START_ZERO ); write8( z64, m_fileInfo.size() ); // # dir entries on this disk write8( z64, m_fileInfo.size() ); // # dir entries total write8( z64, centralDirBytes ); write8( z64, centralDirOffset ); fixupRecordLength64( z64 ); emit( z64 ); DEBUG( "dtor: adding z64 end of central directory locator @ " << m_offset ); CharBuffer loc; write4( loc, Z64_END_OF_CENTRAL_DIR_LOC_SIG ); write4( loc, DISK_NUMBER_ZERO ); write8( loc, z64EndOfCentralDirLoc ); write4( loc, DISK_TOTAL_ONE ); emit( loc ); DEBUG( "dtor: adding end of central directory record @ " << m_offset); CharBuffer end; write4( end, END_OF_CENTRAL_DIR_SIG ); write2( end, DISK_NUMBER_ZERO ); write2( end, DISK_START_ZERO ); write2( end, FORCE_Z64_LOCAL_ENTRIES ); write2( end, FORCE_Z64_TOTAL_ENTRIES ); write4( end, FORCE_Z64_CDIR_SIZE ); write4( end, FORCE_Z64_CDIR_OFFSET ); write2( end, ZERO_COMMENT_LENGTH ); emit( end ); DEBUG( "dtor: finalizing zlib" ); deflateEnd( &m_zs ); DEBUG( "dtor: done" ); }
The full implementation can be found on github at https://github.com/tkil/ajf-prog/tree/master/Zip64Streamer. Hopefully others will find it useful.
Happy hacking!