I'm trying to understand the functionality of raw deflate when we use dictionary. I'm aware of the following.
1. When we use a dictionary, the application should provide the same dictionary for deflate() and inflate().
2. When doing raw deflate, this function must be called either before any call of deflate, or immediately after the completion of a deflate block, i.e. after all input has been consumed and all output has been delivered when using any of the flush options Z_BLOCK
, Z_PARTIAL_FLUSH
, Z_SYNC_FLUSH
, or Z_FULL_FLUSH
. (From the zlib docs).
But the following application fails to decompress what is compressed earlier with same application. Compression and decompression is successful but there is mismatch between input and uncompressed file.
deflate:
do {
ret = deflateSetDictionary(&strm, dictionary, sizeof(dictionary));
if(ret != Z_OK) {
fprintf(stderr, "Failed to set deflate dictionary\n");
return Z_STREAM_ERROR;
}
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
flush = feof(source) ? Z_FINISH : Z_FULL_FLUSH;
strm.next_in = in;
/* run deflate() on input until output buffer not full, finish
compression if all of source has been read in */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = deflate(&strm, flush); /* no bad return value */
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
assert(strm.avail_in == 0); /* all input will be used */
/* done when last data in file processed */
} while (flush != Z_FINISH);
assert(ret == Z_STREAM_END);
inflate:
do {
ret = inflateSetDictionary(&strm, dictionary, sizeof(dictionary));
if(ret != Z_OK) {
fprintf(stderr, "Failed to set inflate dictionary\n");
return Z_STREAM_ERROR;
}
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_FULL_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
When deflating, you are setting the same dictionary every
CHUNK
input bytes. Why? You should usedeflateSetDictionary()
once, right afterdeflateInit2()
. From there on, the input data itself should serve as a better source of matching strings than a dictionary you might provide.On the inflating side, you would have to know where the compressed blocks end so that you can do the
inflateSetDictionary()
in exactly the same places it occurred when compressing. This would require some sort of marking, counting, or searching for the full flush pattern.