I want to compress and decompress network data transmitted through Python.
I couldn't find an LZ4 library for Python 2.7 that can be used to compress and decompress streaming data, so I tried to write one myself.
#include <Python.h>
#include "lz4.h"
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))
enum { BufSize = 64 * 1024 };
enum { LZ4Size = MAX( LZ4_COMPRESSBOUND( BufSize ), ZSTD_COMPRESSBOUND( BufSize ) ) };
enum { TargetFrameSize = 256 * 1024};
static PyObject* createStream(PyObject* self, PyObject* args) {
LZ4_stream_t* context = LZ4_createStream();
LZ4_resetStream( context );
return Py_BuildValue("l", context);
}
static PyObject* createStreamDecode(PyObject* self, PyObject* args) {
LZ4_streamDecode_t* context = LZ4_createStreamDecode();
LZ4_setStreamDecode( context, NULL, 0 );
return Py_BuildValue("l", context);
}
static PyObject* compress_fast_continue(PyObject* self, PyObject* args) {
LZ4_stream_t* context;
Py_buffer lz4buf;
char buf[LZ4Size];
if (!PyArg_ParseTuple(args, "ls*", &context, &lz4buf))
return NULL;
const int sz = LZ4_compress_fast_continue( context, lz4buf.buf, buf, lz4buf.len, LZ4Size, 1 );
PyBuffer_Release(&lz4buf);
if (sz < 0) {
return PyErr_Format(PyExc_ValueError, "LZ4 decompression error: %d", sz);
}
return PyByteArray_FromStringAndSize(buf, sz);
}
static PyObject* decompress_safe_continue(PyObject* self, PyObject* args) {
LZ4_streamDecode_t* context;
Py_buffer lz4buf;
char buf[TargetFrameSize];
if (!PyArg_ParseTuple(args, "ls*", &context, &lz4buf))
return NULL;
const int sz = LZ4_decompress_safe_continue( context, lz4buf.buf, buf, lz4buf.len, TargetFrameSize );
PyBuffer_Release(&lz4buf);
if (sz < 0) {
return PyErr_Format(PyExc_ValueError, "LZ4 decompression error: %d", sz);
}
return PyByteArray_FromStringAndSize(buf, sz);
}
static PyObject* freeStreamDecode(PyObject* self, PyObject* args) {
LZ4_streamDecode_t* context;
if (!PyArg_ParseTuple(args, "l", &context))
Py_RETURN_FALSE;
LZ4_freeStreamDecode( context );
Py_RETURN_TRUE;
}
static PyObject* freeStream(PyObject* self, PyObject* args) {
LZ4_stream_t* context;
if (!PyArg_ParseTuple(args, "l", &context))
Py_RETURN_FALSE;
LZ4_freeStream( context );
Py_RETURN_TRUE;
}
static PyMethodDef methods[] = {
{"createStream", createStream, METH_VARARGS, "LZ4_createStream."},
{"createStreamDecode", createStreamDecode, METH_VARARGS, "LZ4_createStreamDecode."},
{"compress_fast_continue", compress_fast_continue, METH_VARARGS, "LZ4_compress_fast_continue."},
{"decompress_safe_continue", decompress_safe_continue, METH_VARARGS, "LZ4_decompress_safe_continue."},
{"freeStreamDecode", freeStreamDecode, METH_VARARGS, "LZ4_freeStreamDecode."},
{"freeStream", freeStream, METH_VARARGS, "LZ4_freeStream."},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initlz4_stream(void) {
(void) Py_InitModule("lz4_stream", methods);
}
I wrote a simple script to test the C extension.
import lz4_stream
encode_context = lz4_stream.createStream()
decode_context = lz4_stream.createStreamDecode()
for i in range(3):
text = random_string(2048)
text = 'aaghjjaaaabss12sssc'
en_data = lz4_stream.compress_fast_continue(encode_context, bytes(text))
print 'code', len(en_data)
de_data = lz4_stream.decompress_safe_continue(decode_context, bytes(en_data))
print 'text', de_data == text
print type(de_data), de_data
lz4_stream.freeStream(encode_context)
lz4_stream.freeStreamDecode(decode_context)
The script output is like this.
code 21
text True
<type 'bytearray'> aaghjjaaaabss12sssc
code 9
text False
<type 'bytearray'> P2ssscaabss12sssc
code 9
text False
<type 'bytearray'> P2ssscaabss12sssc
After compress and decompress, data is not same with original data. What's wrong?
Any one can help me fix this problem, please.