Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1382)

Side by Side Diff: Modules/_lzmamodule.c

Issue 6715: xz compressor support
Patch Set: Created 1 year, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/test/test_lzma.py ('k') | setup.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* _lzma - Low-level Python interface to liblzma. */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "structmember.h"
7
8 #include <stdarg.h>
9 #include <string.h>
10
11 #include <lzma.h>
12
13
14 #ifndef PY_LONG_LONG
15 #error "This module requires PY_LONG_LONG to be defined"
16 #endif
17
18
19 /* Container formats: */
20 enum {
21 FORMAT_AUTO,
22 FORMAT_XZ,
23 FORMAT_ALONE,
24 FORMAT_RAW,
25 };
26
27 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
28
29
30 typedef struct {
31 PyObject_HEAD
32 lzma_stream lzs;
33 int flushed;
34 } Compressor;
35
36 typedef struct {
37 PyObject_HEAD
38 lzma_stream lzs;
39 int check;
40 char eof;
41 PyObject *unused_data;
42 } Decompressor;
43
44 /* LZMAError class object. */
45 static PyObject *Error;
46
47 /* An empty tuple, used by the filter specifier parsing code. */
48 static PyObject *empty_tuple;
49
50
51 /* Helper functions. */
52
53 static int
54 catch_lzma_error(lzma_ret lzret)
55 {
56 switch (lzret) {
57 case LZMA_OK:
58 case LZMA_GET_CHECK:
59 case LZMA_NO_CHECK:
60 case LZMA_STREAM_END:
61 return 0;
62 case LZMA_UNSUPPORTED_CHECK:
63 PyErr_SetString(Error,
64 "lzma_code() unexpectedly returned "
65 "LZMA_UNSUPPORTED_CHECK");
66 return 1;
67 case LZMA_MEM_ERROR:
68 PyErr_NoMemory();
69 return 1;
70 case LZMA_MEMLIMIT_ERROR:
71 PyErr_SetString(Error, "Memory usage limit exceeded");
72 return 1;
73 case LZMA_FORMAT_ERROR:
74 PyErr_SetString(Error, "Input format not supported by decoder");
75 return 1;
76 case LZMA_OPTIONS_ERROR:
77 PyErr_SetString(Error, "Invalid or unsupported options");
78 return 1;
79 case LZMA_DATA_ERROR:
80 PyErr_SetString(Error, "Corrupt input data");
81 return 1;
82 case LZMA_BUF_ERROR:
83 PyErr_SetString(Error, "Insufficient buffer space");
84 return 1;
85 case LZMA_PROG_ERROR:
86 PyErr_SetString(Error, "Internal error");
87 return 1;
88 default:
89 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
90 return 1;
91 }
92 }
93
94 #if BUFSIZ < 8192
95 #define SMALLCHUNK 8192
96 #else
97 #define SMALLCHUNK BUFSIZ
98 #endif
99
100 #if SIZEOF_INT < 4
101 #define BIGCHUNK (512 * 32)
102 #else
103 #define BIGCHUNK (512 * 1024)
104 #endif
105
106 static int
107 grow_buffer(PyObject **buf)
108 {
109 size_t size = PyBytes_GET_SIZE(*buf);
110 if (size <= SMALLCHUNK)
111 return _PyBytes_Resize(buf, size + SMALLCHUNK);
112 else if (size <= BIGCHUNK)
113 return _PyBytes_Resize(buf, size * 2);
114 else
115 return _PyBytes_Resize(buf, size + BIGCHUNK);
loewis 2011/10/09 16:48:58 This has quadratic performance.
116 }
117
118
119 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
120 since the predefined conversion specifiers do not suit our needs:
121
122 uint32_t - the "I" (unsigned int) specifier is the right size, but
123 silently ignores overflows on conversion.
124
125 lzma_mode and lzma_match_finder - these are enumeration types, and
126 so the size of each is implementation-defined. Worse, different
127 enum types can be of different sizes within the same program, so
128 to be strictly correct, we need to define two separate converters.
129 */
130
131 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
132 static int \
133 FUNCNAME(PyObject *obj, void *ptr) \
134 { \
135 unsigned long val; \
136 \
137 val = PyLong_AsUnsignedLong(obj); \
138 if (PyErr_Occurred()) \
139 return 0; \
140 if ((unsigned long)(TYPE)val != val) { \
141 PyErr_SetString(PyExc_OverflowError, \
142 "Value too large for " #TYPE "type"); \
amaury.forgeotdarc 2011/09/16 01:18:14 a space is missing in " type"
143 return 0; \
144 } \
145 *(TYPE *)ptr = val; \
146 return 1; \
147 }
148
149 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
150 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
151 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
152
153 #undef INT_TYPE_CONVERTER_FUNC
154
155
156 /* Filter specifier parsing functions. */
157
158 static void *
159 parse_filter_spec_lzma(PyObject *spec)
160 {
161 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
162 "pb", "mode", "nice_len", "mf", "depth", NULL};
163 PyObject *id;
164 PyObject *preset_obj;
165 uint32_t preset = LZMA_PRESET_DEFAULT;
166 lzma_options_lzma *options;
167
168 /* First, fill in default values for all the options using a preset.
169 Then, override the defaults with any values given by the caller. */
170
171 preset_obj = PyMapping_GetItemString(spec, "preset");
172 if (preset_obj == NULL) {
173 if (PyErr_ExceptionMatches(PyExc_KeyError))
174 PyErr_Clear();
175 else
176 return NULL;
177 } else {
178 int ok = uint32_converter(preset_obj, &preset);
179 Py_DECREF(preset_obj);
180 if (!ok)
181 return NULL;
182 }
183
184 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
185 if (options == NULL)
186 return PyErr_NoMemory();
187 memset(options, 0, sizeof *options);
188
189 if (lzma_lzma_preset(options, preset)) {
190 PyMem_Free(options);
191 PyErr_Format(Error, "lzma_lzma_preset() failed for preset %#x", preset);
192 return NULL;
193 }
194
195 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
196 "|OOO&O&O&O&O&O&O&O&", optnames,
197 &id, &preset_obj,
198 uint32_converter, &options->dict_size,
199 uint32_converter, &options->lc,
200 uint32_converter, &options->lp,
201 uint32_converter, &options->pb,
202 lzma_mode_converter, &options->mode,
203 uint32_converter, &options->nice_len,
204 lzma_mf_converter, &options->mf,
205 uint32_converter, &options->depth)) {
206 PyErr_SetString(PyExc_ValueError,
207 "Invalid filter specifier for LZMA filter");
208 PyMem_Free(options);
209 options = NULL;
210 }
211 return options;
212 }
213
214 static void *
215 parse_filter_spec_delta(PyObject *spec)
216 {
217 static char *optnames[] = {"id", "dist", NULL};
218 PyObject *id;
219 uint32_t dist = 1;
220 lzma_options_delta *options;
221
222 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
223 &id, uint32_converter, &dist)) {
224 PyErr_SetString(PyExc_ValueError,
225 "Invalid filter specifier for delta filter");
226 return NULL;
227 }
228
229 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
230 if (options == NULL)
231 return PyErr_NoMemory();
232 memset(options, 0, sizeof *options);
233 options->type = LZMA_DELTA_TYPE_BYTE;
234 options->dist = dist;
235 return options;
236 }
237
238 static void *
239 parse_filter_spec_bcj(PyObject *spec)
240 {
241 static char *optnames[] = {"id", "start_offset", NULL};
242 PyObject *id;
243 uint32_t start_offset = 0;
244 lzma_options_bcj *options;
245
246 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
247 &id, uint32_converter, &start_offset)) {
248 PyErr_SetString(PyExc_ValueError,
249 "Invalid filter specifier for BCJ filter");
250 return NULL;
251 }
252
253 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
254 if (options == NULL)
255 return PyErr_NoMemory();
256 memset(options, 0, sizeof *options);
257 options->start_offset = start_offset;
258 return options;
259 }
260
261 static void *
262 parse_filter_spec(lzma_filter *f, PyObject *spec)
263 {
264 PyObject *id_obj;
265
266 if (!PyMapping_Check(spec)) {
267 PyErr_SetString(PyExc_TypeError,
268 "Filter specifier must be a dict or dict-like object");
269 return NULL;
270 }
271 id_obj = PyMapping_GetItemString(spec, "id");
272 if (id_obj == NULL) {
273 if (PyErr_ExceptionMatches(PyExc_KeyError))
274 PyErr_SetString(PyExc_ValueError,
275 "Filter specifier must have an \"id\" entry");
276 return NULL;
277 }
278 f->id = PyLong_AsUnsignedLongLong(id_obj);
279 Py_DECREF(id_obj);
280 if (PyErr_Occurred())
281 return NULL;
282
283 switch (f->id) {
284 case LZMA_FILTER_LZMA1:
285 case LZMA_FILTER_LZMA2:
286 f->options = parse_filter_spec_lzma(spec);
287 return f->options;
288 case LZMA_FILTER_DELTA:
289 f->options = parse_filter_spec_delta(spec);
290 return f->options;
291 case LZMA_FILTER_X86:
292 case LZMA_FILTER_POWERPC:
293 case LZMA_FILTER_IA64:
294 case LZMA_FILTER_ARM:
295 case LZMA_FILTER_ARMTHUMB:
296 case LZMA_FILTER_SPARC:
297 f->options = parse_filter_spec_bcj(spec);
298 return f->options;
299 default:
300 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
301 return NULL;
302 }
303 }
304
305 static void
306 free_filter_chain(lzma_filter filters[])
307 {
308 int i;
309
310 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
311 PyMem_Free(filters[i].options);
312 }
313
314 static int
315 parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
316 {
317 Py_ssize_t i, num_filters;
318
319 num_filters = PySequence_Length(filterspecs);
320 if (num_filters == -1)
321 return -1;
322 if (num_filters > LZMA_FILTERS_MAX) {
323 PyErr_Format(PyExc_ValueError,
324 "Too many filters - liblzma supports a maximum of %d",
325 LZMA_FILTERS_MAX);
326 return -1;
327 }
328
329 for (i = 0; i < num_filters; i++) {
330 int ok = 1;
331 PyObject *spec = PySequence_GetItem(filterspecs, i);
332 if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
333 ok = 0;
334 Py_XDECREF(spec);
335 if (!ok) {
336 filters[i].id = LZMA_VLI_UNKNOWN;
337 free_filter_chain(filters);
338 return -1;
339 }
340 }
341 filters[num_filters].id = LZMA_VLI_UNKNOWN;
342 return 0;
343 }
344
345
346 /* LZMACompressor class. */
347
348 static PyObject *
349 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
350 {
351 size_t data_size = 0;
352 PyObject *result;
353
354 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
355 if (result == NULL)
356 return NULL;
357 c->lzs.next_in = data;
358 c->lzs.avail_in = len;
359 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
360 c->lzs.avail_out = PyBytes_GET_SIZE(result);
361 for (;;) {
362 lzma_ret lzret;
363
364 Py_BEGIN_ALLOW_THREADS
loewis 2011/10/09 16:48:58 It seems that the Windows version at least is not
365 lzret = lzma_code(&c->lzs, action);
366 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
367 Py_END_ALLOW_THREADS
368 if (catch_lzma_error(lzret))
369 goto error;
370 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
371 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
372 break;
373 } else if (c->lzs.avail_out == 0) {
374 if (grow_buffer(&result) == -1)
375 goto error;
376 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
377 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
378 }
379 }
380 if (data_size != PyBytes_GET_SIZE(result))
381 if (_PyBytes_Resize(&result, data_size) == -1)
382 goto error;
383 return result;
384
385 error:
386 Py_XDECREF(result);
387 return NULL;
388 }
389
390 PyDoc_STRVAR(Compressor_compress_doc,
391 "compress(data) -> bytes\n"
392 "\n"
393 "Provide data to the compressor object. Returns a chunk of\n"
394 "compressed data if possible, or b\"\" otherwise.\n"
395 "\n"
396 "When you have finished providing data to the compressor, call the\n"
397 "flush() method to finish the conversion process.\n");
398
399 static PyObject *
400 Compressor_compress(Compressor *self, PyObject *args)
401 {
402 Py_buffer buffer;
403 PyObject *result = NULL;
404
405 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
406 return NULL;
407
408 if (self->flushed)
409 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
410 else
411 result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
412 PyBuffer_Release(&buffer);
413 return result;
414 }
415
416 PyDoc_STRVAR(Compressor_flush_doc,
417 "flush() -> bytes\n"
418 "\n"
419 "Finish the compression process. Returns the compressed data left\n"
420 "in internal buffers.\n"
421 "\n"
422 "The compressor object may not be used after this method is called.\n");
423
424 static PyObject *
425 Compressor_flush(Compressor *self, PyObject *noargs)
426 {
427 PyObject *result = NULL;
428
429 if (self->flushed) {
430 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
431 } else {
432 self->flushed = 1;
433 result = compress(self, NULL, 0, LZMA_FINISH);
434 }
435 return result;
436 }
437
438 static int
439 Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
440 PyObject *filterspecs)
441 {
442 lzma_ret lzret;
443
444 if (filterspecs == Py_None) {
445 lzret = lzma_easy_encoder(lzs, preset, check);
446 } else {
447 lzma_filter filters[LZMA_FILTERS_MAX + 1];
448
449 if (parse_filter_chain_spec(filters, filterspecs) == -1)
450 return -1;
451 lzret = lzma_stream_encoder(lzs, filters, check);
452 free_filter_chain(filters);
453 }
454 if (catch_lzma_error(lzret))
455 return -1;
456 else
457 return 0;
458 }
459
460 static int
461 Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
462 {
463 lzma_ret lzret;
464
465 if (filterspecs == Py_None) {
466 lzma_options_lzma options;
467
468 if (lzma_lzma_preset(&options, preset)) {
469 PyErr_Format(Error, "Invalid compression preset: %#x", preset);
470 return -1;
471 }
472 lzret = lzma_alone_encoder(lzs, &options);
473 } else {
474 lzma_filter filters[LZMA_FILTERS_MAX + 1];
475
476 if (parse_filter_chain_spec(filters, filterspecs) == -1)
477 return -1;
478 if (filters[0].id == LZMA_FILTER_LZMA1 &&
479 filters[1].id == LZMA_VLI_UNKNOWN) {
480 lzret = lzma_alone_encoder(lzs, filters[0].options);
481 } else {
482 PyErr_SetString(PyExc_ValueError,
483 "Invalid filter chain for FORMAT_ALONE - "
484 "must be a single LZMA1 filter");
485 lzret = LZMA_PROG_ERROR;
486 }
487 free_filter_chain(filters);
488 }
489 if (PyErr_Occurred() || catch_lzma_error(lzret))
490 return -1;
491 else
492 return 0;
493 }
494
495 static int
496 Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
497 {
498 lzma_filter filters[LZMA_FILTERS_MAX + 1];
499 lzma_ret lzret;
500
501 if (filterspecs == Py_None) {
502 PyErr_SetString(PyExc_ValueError,
503 "Must specify filters for FORMAT_RAW");
504 return -1;
505 }
506 if (parse_filter_chain_spec(filters, filterspecs) == -1)
507 return -1;
508 lzret = lzma_raw_encoder(lzs, filters);
509 free_filter_chain(filters);
510 if (catch_lzma_error(lzret))
511 return -1;
512 else
513 return 0;
514 }
515
516 static int
517 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
518 {
519 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
520 int format = FORMAT_XZ;
521 int check = -1;
522 uint32_t preset = LZMA_PRESET_DEFAULT;
523 PyObject *preset_obj = Py_None;
524 PyObject *filterspecs = Py_None;
525
526 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
527 "|iiOO:LZMACompressor", arg_names,
528 &format, &check, &preset_obj,
529 &filterspecs))
530 return -1;
531
532 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
533 PyErr_SetString(PyExc_ValueError,
534 "Integrity checks are only supported by FORMAT_XZ");
535 return -1;
536 }
537
538 if (preset_obj != Py_None && filterspecs != Py_None) {
539 PyErr_SetString(PyExc_ValueError,
540 "Cannot specify both preset and filter chain");
541 return -1;
542 }
543
544 if (preset_obj != Py_None)
545 if (!uint32_converter(preset_obj, &preset))
546 return -1;
547
548 self->flushed = 0;
549 switch (format) {
550 case FORMAT_XZ:
551 if (check == -1)
552 check = LZMA_CHECK_CRC64;
553 return Compressor_init_xz(&self->lzs, check, preset, filterspecs);
554
555 case FORMAT_ALONE:
556 return Compressor_init_alone(&self->lzs, preset, filterspecs);
557
558 case FORMAT_RAW:
559 return Compressor_init_raw(&self->lzs, filterspecs);
560
561 default:
562 PyErr_SetString(PyExc_ValueError, "Invalid container format");
563 return -1;
564 }
565 }
566
567 static void
568 Compressor_dealloc(Compressor *self)
569 {
570 lzma_end(&self->lzs);
571 Py_TYPE(self)->tp_free((PyObject *)self);
572 }
573
574 static PyMethodDef Compressor_methods[] = {
575 {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
576 Compressor_compress_doc},
577 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
578 Compressor_flush_doc},
579 {NULL}
580 };
581
582 PyDoc_STRVAR(Compressor_doc,
583 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
584 "\n"
585 "Create a compressor object for compressing data incrementally.\n"
586 "\n"
587 "format specifies the container format to use for the output. This can\n"
588 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
589 "\n"
590 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
591 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
592 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
593 "\n"
594 "The settings used by the compressor can be specified either as a\n"
595 "preset compression level (with the 'preset' argument), or in detail\n"
596 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
597 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
598 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
599 "the raw compressor does not support preset compression levels.\n"
600 "\n"
601 "preset (if provided) should be an integer in the range 0-9, optionally\n"
602 "OR-ed with the constant PRESET_EXTREME.\n"
603 "\n"
604 "filters (if provided) should be a sequence of dicts. Each dict should\n"
605 "have an entry for \"id\" indicating ID of the filter, plus additional\n"
loewis 2011/10/09 16:48:58 "indicating the ID" ?
606 "entries for options to the filter.\n"
607 "\n"
608 "For one-shot compression, use the compress() function instead.\n");
609
610 static PyTypeObject Compressor_type = {
611 PyVarObject_HEAD_INIT(NULL, 0)
612 "_lzma.LZMACompressor", /* tp_name */
613 sizeof(Compressor), /* tp_basicsize */
614 0, /* tp_itemsize */
615 (destructor)Compressor_dealloc, /* tp_dealloc */
616 0, /* tp_print */
617 0, /* tp_getattr */
618 0, /* tp_setattr */
619 0, /* tp_reserved */
620 0, /* tp_repr */
621 0, /* tp_as_number */
622 0, /* tp_as_sequence */
623 0, /* tp_as_mapping */
624 0, /* tp_hash */
625 0, /* tp_call */
626 0, /* tp_str */
627 0, /* tp_getattro */
628 0, /* tp_setattro */
629 0, /* tp_as_buffer */
630 Py_TPFLAGS_DEFAULT, /* tp_flags */
631 Compressor_doc, /* tp_doc */
632 0, /* tp_traverse */
633 0, /* tp_clear */
634 0, /* tp_richcompare */
635 0, /* tp_weaklistoffset */
636 0, /* tp_iter */
637 0, /* tp_iternext */
638 Compressor_methods, /* tp_methods */
639 0, /* tp_members */
640 0, /* tp_getset */
641 0, /* tp_base */
642 0, /* tp_dict */
643 0, /* tp_descr_get */
644 0, /* tp_descr_set */
645 0, /* tp_dictoffset */
646 (initproc)Compressor_init, /* tp_init */
647 0, /* tp_alloc */
648 PyType_GenericNew, /* tp_new */
649 };
650
651
652 /* LZMADecompressor class. */
653
654 PyObject *
655 decompress(Decompressor *d, uint8_t *data, size_t len)
656 {
657 size_t data_size = 0;
658 PyObject *result;
659
660 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
661 if (result == NULL)
662 return NULL;
663 d->lzs.next_in = data;
664 d->lzs.avail_in = len;
665 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
666 d->lzs.avail_out = PyBytes_GET_SIZE(result);
667 for (;;) {
668 lzma_ret lzret;
669
670 Py_BEGIN_ALLOW_THREADS
671 lzret = lzma_code(&d->lzs, LZMA_RUN);
672 data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
673 Py_END_ALLOW_THREADS
674 if (catch_lzma_error(lzret))
675 goto error;
676 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
677 d->check = lzma_get_check(&d->lzs);
678 if (lzret == LZMA_STREAM_END) {
679 d->eof = 1;
680 if (d->lzs.avail_in > 0) {
681 Py_CLEAR(d->unused_data);
682 d->unused_data = PyBytes_FromStringAndSize(
683 (char *)d->lzs.next_in, d->lzs.avail_in);
684 if (d->unused_data == NULL)
685 goto error;
686 }
687 break;
688 } else if (d->lzs.avail_in == 0) {
689 break;
690 } else if (d->lzs.avail_out == 0) {
691 if (grow_buffer(&result) == -1)
692 goto error;
693 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
694 d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
695 }
696 }
697 if (data_size != PyBytes_GET_SIZE(result))
698 if (_PyBytes_Resize(&result, data_size) == -1)
699 goto error;
700 return result;
701
702 error:
703 Py_XDECREF(result);
704 return NULL;
705 }
706
707 PyDoc_STRVAR(Decompressor_decompress_doc,
708 "decompress(data) -> bytes\n"
709 "\n"
710 "Provide data to the decompressor object. Returns a chunk of\n"
711 "decompressed data if possible, or b\"\" otherwise.\n"
712 "\n"
713 "Attempting to decompress data after the end of the stream is\n"
714 "reached raises an EOFError. Any data found after the end of the\n"
715 "stream is ignored, and saved in the unused_data attribute.\n");
716
717 static PyObject *
718 Decompressor_decompress(Decompressor *self, PyObject *args)
719 {
720 Py_buffer buffer;
721 PyObject *result = NULL;
722
723 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
724 return NULL;
725
726 if (self->eof)
727 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
728 else
729 result = decompress(self, buffer.buf, buffer.len);
730 PyBuffer_Release(&buffer);
731 return result;
732 }
733
734 static int
735 Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
736 {
737 lzma_filter filters[LZMA_FILTERS_MAX + 1];
738 lzma_ret lzret;
739
740 if (parse_filter_chain_spec(filters, filterspecs) == -1)
741 return -1;
742 lzret = lzma_raw_decoder(lzs, filters);
743 free_filter_chain(filters);
744 if (catch_lzma_error(lzret))
745 return -1;
746 else
747 return 0;
748 }
749
750 static int
751 Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
752 {
753 static char *arg_names[] = {"format", "memlimit", "filters", NULL};
754 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
755 int format = FORMAT_AUTO;
756 uint64_t memlimit = UINT64_MAX;
757 PyObject *memlimit_obj = Py_None;
758 PyObject *filterspecs = Py_None;
759 lzma_ret lzret;
760
761 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
762 "|iOO:LZMADecompressor", arg_names,
763 &format, &memlimit_obj, &filterspecs))
764 return -1;
765
766 if (memlimit_obj != Py_None) {
767 if (format == FORMAT_RAW) {
768 PyErr_SetString(PyExc_ValueError,
769 "Cannot specify memory limit with FORMAT_RAW");
770 return -1;
771 }
772 memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
773 if (PyErr_Occurred())
774 return -1;
775 }
776
777 if (format == FORMAT_RAW && filterspecs == Py_None) {
778 PyErr_SetString(PyExc_ValueError,
779 "Must specify filters for FORMAT_RAW");
780 return -1;
781 } else if (format != FORMAT_RAW && filterspecs != Py_None) {
782 PyErr_SetString(PyExc_ValueError,
783 "Cannot specify filters except with FORMAT_RAW");
784 return -1;
785 }
786
787 self->check = LZMA_CHECK_UNKNOWN;
788 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
789 if (self->unused_data == NULL)
790 return -1;
791
792 switch (format) {
793 case FORMAT_AUTO:
794 lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
795 if (catch_lzma_error(lzret))
796 break;
797 return 0;
798
799 case FORMAT_XZ:
800 lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
801 if (catch_lzma_error(lzret))
802 break;
803 return 0;
804
805 case FORMAT_ALONE:
806 self->check = LZMA_CHECK_NONE;
807 lzret = lzma_alone_decoder(&self->lzs, memlimit);
808 if (catch_lzma_error(lzret))
809 break;
810 return 0;
811
812 case FORMAT_RAW:
813 self->check = LZMA_CHECK_NONE;
814 if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
815 break;
816 return 0;
817
818 default:
819 PyErr_Format(PyExc_ValueError,
820 "Invalid container format: %d", format);
821 break;
822 }
823
824 Py_CLEAR(self->unused_data);
825 return -1;
826 }
827
828 static void
829 Decompressor_dealloc(Decompressor *self)
830 {
831 lzma_end(&self->lzs);
832 Py_CLEAR(self->unused_data);
833 Py_TYPE(self)->tp_free((PyObject *)self);
834 }
835
836 static PyMethodDef Decompressor_methods[] = {
837 {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
838 Decompressor_decompress_doc},
839 {NULL}
840 };
841
842 PyDoc_STRVAR(Decompressor_check_doc,
843 "ID of the integrity check used by the input stream.");
844
845 PyDoc_STRVAR(Decompressor_eof_doc,
846 "True if the end-of-stream marker has been reached.");
847
848 PyDoc_STRVAR(Decompressor_unused_data_doc,
849 "Data found after the end of the compressed stream.");
850
851 static PyMemberDef Decompressor_members[] = {
852 {"check", T_INT, offsetof(Decompressor, check), READONLY,
853 Decompressor_check_doc},
854 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
855 Decompressor_eof_doc},
856 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
857 Decompressor_unused_data_doc},
858 {NULL}
859 };
860
861 PyDoc_STRVAR(Decompressor_doc,
862 "LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
863 "\n"
864 "Create a decompressor object for decompressing data incrementally.\n"
865 "\n"
866 "format specifies the container format of the input stream. If this is\n"
867 "FORMAT_AUTO (the default), the decompressor will automatically detect\n"
868 "whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
869 "FORMAT_RAW cannot be autodetected.\n"
870 "\n"
871 "memlimit can be specified to limit the amount of memory used by the\n"
872 "decompressor. This will cause decompression to fail if the input\n"
873 "cannot be decompressed within the given limit.\n"
874 "\n"
875 "filters specifies a custom filter chain. This argument is required for\n"
876 "FORMAT_RAW, and not accepted with any other format. When provided,\n"
877 "this should be a sequence of dicts, each indicating the ID and options\n"
878 "for a single filter.\n"
879 "\n"
880 "For one-shot decompression, use the decompress() function instead.\n");
881
882 static PyTypeObject Decompressor_type = {
883 PyVarObject_HEAD_INIT(NULL, 0)
884 "_lzma.LZMADecompressor", /* tp_name */
885 sizeof(Decompressor), /* tp_basicsize */
886 0, /* tp_itemsize */
887 (destructor)Decompressor_dealloc, /* tp_dealloc */
888 0, /* tp_print */
889 0, /* tp_getattr */
890 0, /* tp_setattr */
891 0, /* tp_reserved */
892 0, /* tp_repr */
893 0, /* tp_as_number */
894 0, /* tp_as_sequence */
895 0, /* tp_as_mapping */
896 0, /* tp_hash */
897 0, /* tp_call */
898 0, /* tp_str */
899 0, /* tp_getattro */
900 0, /* tp_setattro */
901 0, /* tp_as_buffer */
902 Py_TPFLAGS_DEFAULT, /* tp_flags */
903 Decompressor_doc, /* tp_doc */
904 0, /* tp_traverse */
905 0, /* tp_clear */
906 0, /* tp_richcompare */
907 0, /* tp_weaklistoffset */
908 0, /* tp_iter */
909 0, /* tp_iternext */
910 Decompressor_methods, /* tp_methods */
911 Decompressor_members, /* tp_members */
912 0, /* tp_getset */
913 0, /* tp_base */
914 0, /* tp_dict */
915 0, /* tp_descr_get */
916 0, /* tp_descr_set */
917 0, /* tp_dictoffset */
918 (initproc)Decompressor_init, /* tp_init */
919 0, /* tp_alloc */
920 PyType_GenericNew, /* tp_new */
921 };
922
923
924 /* Module-level functions. */
925
926 PyDoc_STRVAR(check_is_supported_doc,
927 "check_is_supported(check_id) -> bool\n"
928 "\n"
929 "Test whether the given integrity check is supported.\n"
930 "\n"
931 "Always returns True for CHECK_NONE and CHECK_CRC32.\n");
932
933 static PyObject *
934 check_is_supported(PyObject *self, PyObject *args)
935 {
936 int check_id;
937
938 if (!PyArg_ParseTuple(args, "i:check_is_supported", &check_id))
939 return NULL;
940
941 if (lzma_check_is_supported(check_id))
942 Py_RETURN_TRUE;
943 else
944 Py_RETURN_FALSE;
945 }
946
947
948 /* Module initialization. */
949
950 static PyMethodDef module_methods[] = {
951 {"check_is_supported", (PyCFunction)check_is_supported,
952 METH_VARARGS, check_is_supported_doc},
953 {NULL}
954 };
955
956 static PyModuleDef _lzmamodule = {
957 PyModuleDef_HEAD_INIT,
958 "_lzma",
959 NULL,
960 -1,
961 module_methods,
962 NULL,
963 NULL,
964 NULL,
965 NULL,
966 };
967
968 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
969 would not work correctly on platforms with 32-bit longs. */
970 static int
971 module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
972 {
973 PyObject *o = PyLong_FromLongLong(value);
974 if (o == NULL)
975 return -1;
976 if (PyModule_AddObject(m, name, o) == 0)
977 return 0;
978 Py_DECREF(o);
979 return -1;
980 }
981
982 #define ADD_INT_PREFIX_MACRO(m, macro) \
983 module_add_int_constant(m, #macro, LZMA_ ## macro)
984
985 PyMODINIT_FUNC
986 PyInit__lzma(void)
987 {
988 PyObject *m;
989
990 empty_tuple = PyTuple_New(0);
991 if (empty_tuple == NULL)
992 return NULL;
993
994 m = PyModule_Create(&_lzmamodule);
995 if (m == NULL)
996 return NULL;
997
998 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
999 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1000 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1001 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1002 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1003 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1004 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1005 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1006 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1007 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1008 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1009 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1010 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1011 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1012 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1013 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1014 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1015 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1016 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1017 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1018 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1019 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1020 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1021 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1022 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1023 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1024 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1025 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1026 return NULL;
1027
1028 Error = PyErr_NewExceptionWithDoc(
1029 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1030 if (Error == NULL)
1031 return NULL;
1032 Py_INCREF(Error);
1033 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1034 return NULL;
1035
1036 if (PyType_Ready(&Compressor_type) == -1)
1037 return NULL;
1038 Py_INCREF(&Compressor_type);
1039 if (PyModule_AddObject(m, "LZMACompressor",
1040 (PyObject *)&Compressor_type) == -1)
1041 return NULL;
1042
1043 if (PyType_Ready(&Decompressor_type) == -1)
1044 return NULL;
1045 Py_INCREF(&Decompressor_type);
1046 if (PyModule_AddObject(m, "LZMADecompressor",
1047 (PyObject *)&Decompressor_type) == -1)
1048 return NULL;
1049
1050 return m;
1051 }
OLDNEW
« no previous file with comments | « Lib/test/test_lzma.py ('k') | setup.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld cbc36f91f3f7