Files
entropy/pylzma/pylzma_decompressobj.c
T
2009-04-02 13:45:25 +02:00

363 lines
13 KiB
C

/*
* Python Bindings for LZMA
*
* Copyright (c) 2004-2006 by Joachim Bauch, mail@joachim-bauch.de
* 7-Zip Copyright (C) 1999-2005 Igor Pavlov
* LZMA SDK Copyright (C) 1999-2005 Igor Pavlov
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id: pylzma_decompressobj.c 116 2006-09-28 21:46:20Z jojo $
*
*/
#include <Python.h>
#include <7zip/LzmaStateDecode.h>
#include "pylzma.h"
#include "pylzma_decompress.h"
#include "pylzma_decompressobj.h"
int pylzma_decomp_init(CDecompressionObject *self, PyObject *args, PyObject *kwargs)
{
int max_length = -1;
// possible keywords for this function
static char *kwlist[] = {"maxlength", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &max_length))
return -1;
if (max_length == 0 || max_length < -1) {
PyErr_SetString(PyExc_ValueError, "the decompressed size must be greater than zero");
return -1;
}
self->unconsumed_tail = NULL;
self->unconsumed_length = 0;
self->need_properties = 1;
self->max_length = max_length;
self->total_out = 0;
memset(&self->state, 0, sizeof(self->state));
return 0;
}
static const char doc_decomp_decompress[] = \
"decompress(data[, bufsize]) -- Returns a string containing the up to bufsize decompressed bytes of the data.\n" \
"After calling, some of the input data may be available in internal buffers for later processing.";
static PyObject *pylzma_decomp_decompress(CDecompressionObject *self, PyObject *args)
{
PyObject *result=NULL;
unsigned char *data, *next_in, *next_out;
int length, start_total_out, res, max_length=BLOCK_SIZE;
SizeT avail_in, avail_out;
unsigned char properties[LZMA_PROPERTIES_SIZE];
SizeT inProcessed, outProcessed;
if (!PyArg_ParseTuple(args, "s#|l", &data, &length, &max_length))
return NULL;
if (max_length <= 0)
{
PyErr_SetString(PyExc_ValueError, "bufsize must be greater than zero");
return NULL;
}
start_total_out = self->total_out;
if (self->unconsumed_length > 0) {
self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, self->unconsumed_length + length);
next_in = (unsigned char *)self->unconsumed_tail;
memcpy(next_in + self->unconsumed_length, data, length);
} else
next_in = data;
avail_in = self->unconsumed_length + length;
if (self->need_properties && avail_in < sizeof(properties)) {
// we need enough bytes to read the properties
if (!self->unconsumed_length) {
self->unconsumed_tail = (unsigned char *)malloc(length);
memcpy(self->unconsumed_tail, data, length);
}
self->unconsumed_length += length;
return PyString_FromString("");
}
if (self->need_properties) {
self->need_properties = 0;
memcpy(&properties, next_in, sizeof(properties));
avail_in -= sizeof(properties);
next_in += sizeof(properties);
if (self->unconsumed_length >= sizeof(properties)-length) {
self->unconsumed_length -= sizeof(properties)-length;
if (self->unconsumed_length > 0) {
memcpy(self->unconsumed_tail, self->unconsumed_tail+sizeof(properties), self->unconsumed_length);
self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, self->unconsumed_length);
} else
FREE_AND_NULL(self->unconsumed_tail);
}
if (LzmaDecodeProperties(&self->state.Properties, properties, LZMA_PROPERTIES_SIZE) != LZMA_RESULT_OK)
{
PyErr_SetString(PyExc_TypeError, "Incorrect stream properties");
goto exit;
}
self->state.Probs = (CProb *)malloc(LzmaGetNumProbs(&self->state.Properties) * sizeof(CProb));
if (self->state.Probs == 0) {
PyErr_NoMemory();
goto exit;
}
if (self->state.Properties.DictionarySize == 0)
self->state.Dictionary = 0;
else {
self->state.Dictionary = (unsigned char *)malloc(self->state.Properties.DictionarySize);
if (self->state.Dictionary == 0) {
free(self->state.Probs);
self->state.Probs = NULL;
PyErr_NoMemory();
goto exit;
}
}
LzmaDecoderInit(&self->state);
}
if (avail_in == 0)
// no more bytes to decompress
return PyString_FromString("");
if (!(result = PyString_FromStringAndSize(NULL, max_length)))
return NULL;
next_out = (unsigned char *)PyString_AS_STRING(result);
avail_out = max_length;
Py_BEGIN_ALLOW_THREADS
// Decompress until EOS marker is reached
res = LzmaDecode(&self->state, next_in, avail_in, &inProcessed,
next_out, avail_out, &outProcessed, 0);
Py_END_ALLOW_THREADS
self->total_out += outProcessed;
next_in += inProcessed;
avail_in -= inProcessed;
next_out += outProcessed;
avail_out -= outProcessed;
if (res != LZMA_RESULT_OK) {
PyErr_SetString(PyExc_ValueError, "data error during decompression");
DEC_AND_NULL(result);
goto exit;
}
/* Not all of the compressed data could be accomodated in the output buffer
of specified size. Return the unconsumed tail in an attribute.*/
if (avail_in > 0)
{
if (avail_in != self->unconsumed_length) {
if (avail_in > self->unconsumed_length) {
self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, avail_in);
memcpy(self->unconsumed_tail, next_in, avail_in);
}
if (avail_in < self->unconsumed_length) {
memcpy(self->unconsumed_tail, next_in, avail_in);
self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, avail_in);
}
}
if (!self->unconsumed_tail) {
PyErr_NoMemory();
DEC_AND_NULL(result);
goto exit;
}
} else
FREE_AND_NULL(self->unconsumed_tail);
self->unconsumed_length = avail_in;
_PyString_Resize(&result, self->total_out - start_total_out);
exit:
return result;
}
static const char doc_decomp_flush[] = \
"flush() -- Return remaining data.";
static PyObject *pylzma_decomp_flush(CDecompressionObject *self, PyObject *args)
{
PyObject *result=NULL;
int res;
SizeT avail_out, outsize;
unsigned char *tmp;
SizeT inProcessed, outProcessed;
if (!PyArg_ParseTuple(args, ""))
return NULL;
if (self->max_length != -1)
avail_out = self->max_length - self->total_out;
else
avail_out = BLOCK_SIZE;
if (avail_out == 0)
// no more remaining data
return PyString_FromString("");
result = PyString_FromStringAndSize(NULL, avail_out);
if (result == NULL)
return NULL;
tmp = (unsigned char *)PyString_AS_STRING(result);
outsize = 0;
while (1) {
Py_BEGIN_ALLOW_THREADS
if (self->unconsumed_length == 0)
// No remaining data
res = LzmaDecode(&self->state, (unsigned char *)"", 0, &inProcessed,
tmp, avail_out, &outProcessed, 1);
else {
// Decompress remaining data
res = LzmaDecode(&self->state, self->unconsumed_tail, self->unconsumed_length, &inProcessed,
tmp, avail_out, &outProcessed, 1);
self->unconsumed_length -= inProcessed;
if (self->unconsumed_length > 0)
memcpy(self->unconsumed_tail, self->unconsumed_tail + inProcessed, self->unconsumed_length);
else
FREE_AND_NULL(self->unconsumed_tail);
}
Py_END_ALLOW_THREADS
if (res != LZMA_RESULT_OK) {
PyErr_SetString(PyExc_ValueError, "data error during decompression");
DEC_AND_NULL(result);
goto exit;
}
self->total_out += outProcessed;
outsize += outProcessed;
if (outProcessed < avail_out || (outProcessed == avail_out && self->max_length != -1))
break;
if (self->max_length != -1) {
PyErr_SetString(PyExc_ValueError, "not enough input data for decompression");
DEC_AND_NULL(result);
goto exit;
}
avail_out -= outProcessed;
// Output buffer is full, might be more data for decompression
if (_PyString_Resize(&result, outsize+BLOCK_SIZE) != 0)
goto exit;
avail_out += BLOCK_SIZE;
tmp = (unsigned char *)PyString_AS_STRING(result) + outsize;
}
if (outsize != PyString_GET_SIZE(result))
_PyString_Resize(&result, outsize);
exit:
return result;
}
static const char doc_decomp_reset[] = \
"reset([maxlength]) -- Resets the decompression object.";
static PyObject *pylzma_decomp_reset(CDecompressionObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *result=NULL;
int max_length = -1;
// possible keywords for this function
static char *kwlist[] = {"maxlength", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &max_length))
return NULL;
free_lzma_state(&self->state);
memset(&self->state, 0, sizeof(self->state));
FREE_AND_NULL(self->unconsumed_tail);
self->unconsumed_length = 0;
self->need_properties = 1;
self->total_out = 0;
self->max_length = max_length;
result = Py_None;
Py_XINCREF(result);
return result;
}
static PyMethodDef pylzma_decomp_methods[] = {
{"decompress", (PyCFunction)pylzma_decomp_decompress, METH_VARARGS, (char *)&doc_decomp_decompress},
{"flush", (PyCFunction)pylzma_decomp_flush, METH_VARARGS, (char *)&doc_decomp_flush},
{"reset", (PyCFunction)pylzma_decomp_reset, METH_VARARGS | METH_KEYWORDS, (char *)&doc_decomp_reset},
{NULL, NULL},
};
static void pylzma_decomp_dealloc(CDecompressionObject *self)
{
free_lzma_state(&self->state);
FREE_AND_NULL(self->unconsumed_tail);
self->ob_type->tp_free((PyObject*)self);
}
PyTypeObject CDecompressionObject_Type = {
//PyObject_HEAD_INIT(&PyType_Type)
PyObject_HEAD_INIT(NULL)
0,
"LZMADecompress", /* char *tp_name; */
sizeof(CDecompressionObject), /* int tp_basicsize; */
0, /* int tp_itemsize; // not used much */
(destructor)pylzma_decomp_dealloc, /* destructor tp_dealloc; */
NULL, /* printfunc tp_print; */
NULL, /* getattrfunc tp_getattr; // __getattr__ */
NULL, /* setattrfunc tp_setattr; // __setattr__ */
NULL, /* cmpfunc tp_compare; // __cmp__ */
NULL, /* reprfunc tp_repr; // __repr__ */
NULL, /* PyNumberMethods *tp_as_number; */
NULL, /* PySequenceMethods *tp_as_sequence; */
NULL, /* PyMappingMethods *tp_as_mapping; */
NULL, /* hashfunc tp_hash; // __hash__ */
NULL, /* ternaryfunc tp_call; // __call__ */
NULL, /* reprfunc tp_str; // __str__ */
0, /* tp_getattro*/
0, /* tp_setattro*/
0, /* tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
"Decompression class", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
pylzma_decomp_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)pylzma_decomp_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};