How to compress and decompress stream binary data use python with lz4

94 views Asked by At

I want to compress and decompress network data transmitted through Python.

I couldn't find an LZ4 library for Python 2.7 that can be used to compress and decompress streaming data, so I tried to write one myself.

#include <Python.h>
#include "lz4.h"

#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))

enum { BufSize = 64 * 1024 };
enum { LZ4Size = MAX( LZ4_COMPRESSBOUND( BufSize ), ZSTD_COMPRESSBOUND( BufSize ) ) };
enum { TargetFrameSize = 256 * 1024};

static PyObject* createStream(PyObject* self, PyObject* args) {
    LZ4_stream_t* context = LZ4_createStream();
    LZ4_resetStream( context );
    return Py_BuildValue("l", context);
}

static PyObject* createStreamDecode(PyObject* self, PyObject* args) {
    LZ4_streamDecode_t* context = LZ4_createStreamDecode();
    LZ4_setStreamDecode( context, NULL, 0 );
    return Py_BuildValue("l", context);
}

static PyObject* compress_fast_continue(PyObject* self, PyObject* args) {
    LZ4_stream_t* context;
    Py_buffer lz4buf;
    char buf[LZ4Size];
    if (!PyArg_ParseTuple(args, "ls*", &context, &lz4buf))
        return NULL;
    const int sz = LZ4_compress_fast_continue( context, lz4buf.buf, buf, lz4buf.len, LZ4Size, 1 );
    PyBuffer_Release(&lz4buf);
    if (sz < 0) {
        return PyErr_Format(PyExc_ValueError, "LZ4 decompression error: %d", sz);
    }
    return PyByteArray_FromStringAndSize(buf, sz);
}

static PyObject* decompress_safe_continue(PyObject* self, PyObject* args) {
    LZ4_streamDecode_t* context;
    Py_buffer lz4buf;
    char buf[TargetFrameSize];
    if (!PyArg_ParseTuple(args, "ls*", &context, &lz4buf))
        return NULL;
    const int sz = LZ4_decompress_safe_continue( context, lz4buf.buf, buf, lz4buf.len, TargetFrameSize );
    PyBuffer_Release(&lz4buf);
    if (sz < 0) {
        return PyErr_Format(PyExc_ValueError, "LZ4 decompression error: %d", sz);
    }
    return PyByteArray_FromStringAndSize(buf, sz);
}

static PyObject* freeStreamDecode(PyObject* self, PyObject* args) {
    LZ4_streamDecode_t* context;
    if (!PyArg_ParseTuple(args, "l", &context))
        Py_RETURN_FALSE;
    LZ4_freeStreamDecode( context );
    Py_RETURN_TRUE;
}

static PyObject* freeStream(PyObject* self, PyObject* args) {
    LZ4_stream_t* context;
    if (!PyArg_ParseTuple(args, "l", &context))
        Py_RETURN_FALSE;
    LZ4_freeStream( context );
    Py_RETURN_TRUE;
}

static PyMethodDef methods[] = {
    {"createStream", createStream, METH_VARARGS, "LZ4_createStream."},
    {"createStreamDecode", createStreamDecode, METH_VARARGS, "LZ4_createStreamDecode."},
    {"compress_fast_continue", compress_fast_continue, METH_VARARGS, "LZ4_compress_fast_continue."},
    {"decompress_safe_continue", decompress_safe_continue, METH_VARARGS, "LZ4_decompress_safe_continue."},
    {"freeStreamDecode", freeStreamDecode, METH_VARARGS, "LZ4_freeStreamDecode."},
    {"freeStream", freeStream, METH_VARARGS, "LZ4_freeStream."},
    {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC initlz4_stream(void) {
    (void) Py_InitModule("lz4_stream", methods);
}

I wrote a simple script to test the C extension.

import lz4_stream
encode_context = lz4_stream.createStream()
decode_context = lz4_stream.createStreamDecode()

for i in range(3):
    text = random_string(2048)
    text = 'aaghjjaaaabss12sssc'
    en_data = lz4_stream.compress_fast_continue(encode_context, bytes(text))
    print 'code', len(en_data)
    de_data = lz4_stream.decompress_safe_continue(decode_context, bytes(en_data))
    print 'text', de_data == text
    print type(de_data), de_data

lz4_stream.freeStream(encode_context)
lz4_stream.freeStreamDecode(decode_context)

The script output is like this.

code 21
text True
<type 'bytearray'> aaghjjaaaabss12sssc
code 9
text False
<type 'bytearray'> P2ssscaabss12sssc
code 9
text False
<type 'bytearray'> P2ssscaabss12sssc

After compress and decompress, data is not same with original data. What's wrong?

Any one can help me fix this problem, please.

0

There are 0 answers