# HG changeset patch # Parent a262ad6536fc8979b4acfb3e72f5c887b1bb6789 Add lzma.{encode,decode}_filter_properties(). diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -235,6 +235,32 @@ feature set. +.. function:: encode_filter_properties(filter) + + Return a :class:`bytes` object encoding the options (properties) of the + filter specified by *filter* (a dictionary). + + *filter* is interpreted as a filter specifier, as described in + :ref:`filter-chain-specs`. + + The returned data does not include the filter ID itself, only the options. + + This function is primarily of interest to users implementing custom file + formats. + + +.. function:: decode_filter_properties(filter_id, encoded_props) + + Return a dictionary describing a filter with ID *filter_id*, and options + (properties) decoded from the :class:`bytes` object *encoded_props*. + + The returned dictionary is a filter specifier, as described in + :ref:`filter-chain-specs`. + + This function is primarily of interest to users implementing custom file + formats. + + .. _filter-chain-specs: Specifying custom filter chains diff --git a/Lib/lzma.py b/Lib/lzma.py --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -19,6 +19,7 @@ "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", "compress", "decompress", "check_is_supported", + "encode_filter_properties", "decode_filter_properties", ] import io diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -944,6 +944,49 @@ # This value should not be a valid check ID. self.assertFalse(lzma.check_is_supported(lzma.CHECK_UNKNOWN)) + def test_encode_filter_properties(self): + with self.assertRaises(TypeError): + lzma.encode_filter_properties(b"not a dict") + with self.assertRaises(ValueError): + lzma.encode_filter_properties({"id": 0x100}) + with self.assertRaises(ValueError): + lzma.encode_filter_properties({"id": lzma.FILTER_LZMA2, "junk": 12}) + with self.assertRaises(lzma.LZMAError): + lzma.encode_filter_properties({"id": lzma.FILTER_DELTA, + "dist": 9001}) + + # Test with parameters used by zipfile module. + props = lzma.encode_filter_properties({ + "id": lzma.FILTER_LZMA1, + "pb": 2, + "lp": 0, + "lc": 3, + "dict_size": 8 << 20, + }) + self.assertEqual(props, b"]\x00\x00\x80\x00") + + def test_decode_filter_properties(self): + with self.assertRaises(TypeError): + lzma.decode_filter_properties(lzma.FILTER_X86, {"should be": bytes}) + with self.assertRaises(lzma.LZMAError): + lzma.decode_filter_properties(lzma.FILTER_DELTA, b"too long") + + # Test with parameters used by zipfile module. + filterspec = lzma.decode_filter_properties( + lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00") + self.assertEqual(filterspec["id"], lzma.FILTER_LZMA1) + self.assertEqual(filterspec["pb"], 2) + self.assertEqual(filterspec["lp"], 0) + self.assertEqual(filterspec["lc"], 3) + self.assertEqual(filterspec["dict_size"], 8 << 20) + + def test_filter_properties_roundtrip(self): + spec1 = lzma.decode_filter_properties( + lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00") + reencoded = lzma.encode_filter_properties(spec1) + spec2 = lzma.decode_filter_properties(lzma.FILTER_LZMA1, reencoded) + self.assertEqual(spec1, spec2) + # Test data: diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -137,6 +137,9 @@ uint32_t - the "I" (unsigned int) specifier is the right size, but silently ignores overflows on conversion. + lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right + size, but like "I" it silently ignores overflows on conversion. + lzma_mode and lzma_match_finder - these are enumeration types, and so the size of each is implementation-defined. Worse, different enum types can be of different sizes within the same program, so @@ -147,12 +150,12 @@ static int \ FUNCNAME(PyObject *obj, void *ptr) \ { \ - unsigned long val; \ + unsigned PY_LONG_LONG val; \ \ - val = PyLong_AsUnsignedLong(obj); \ + val = PyLong_AsUnsignedLongLong(obj); \ if (PyErr_Occurred()) \ return 0; \ - if ((unsigned long)(TYPE)val != val) { \ + if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \ PyErr_SetString(PyExc_OverflowError, \ "Value too large for " #TYPE " type"); \ return 0; \ @@ -162,13 +165,17 @@ } INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter) +INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter) INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter) INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter) #undef INT_TYPE_CONVERTER_FUNC -/* Filter specifier parsing functions. */ +/* Filter specifier parsing. + + This code handles converting filter specifiers (Python dicts) into + the C lzma_filter structs expected by liblzma. */ static void * parse_filter_spec_lzma(PyObject *spec) @@ -358,6 +365,88 @@ } +/* Filter specifier construction. + + This code handles converting C lzma_filter structs into + Python-level filter specifiers (represented as dicts). */ + +static int +spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value) +{ + int status; + PyObject *value_object; + + value_object = PyLong_FromUnsignedLongLong(value); + if (value_object == NULL) + return -1; + + status = _PyDict_SetItemId(spec, key, value_object); + Py_DECREF(value_object); + return status; +} + +static PyObject * +build_filter_spec(const lzma_filter *f) +{ + PyObject *spec; + + spec = PyDict_New(); + if (spec == NULL) + return NULL; + +#define ADD_FIELD(SOURCE, FIELD) \ + do { \ + _Py_IDENTIFIER(FIELD); \ + if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \ + goto error;\ + } while (0) + + ADD_FIELD(f, id); + + switch (f->id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: { + lzma_options_lzma *options = f->options; + ADD_FIELD(options, dict_size); + ADD_FIELD(options, lc); + ADD_FIELD(options, lp); + ADD_FIELD(options, pb); + ADD_FIELD(options, mode); + ADD_FIELD(options, nice_len); + ADD_FIELD(options, mf); + ADD_FIELD(options, depth); + break; + } + case LZMA_FILTER_DELTA: { + lzma_options_delta *options = f->options; + ADD_FIELD(options, dist); + break; + } + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: { + lzma_options_bcj *options = f->options; + ADD_FIELD(options, start_offset); + break; + } + default: + PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id); + goto error; + } + +#undef ADD_FIELD + + return spec; + +error: + Py_DECREF(spec); + return NULL; +} + + /* LZMACompressor class. */ static PyObject * @@ -1005,11 +1094,100 @@ } +PyDoc_STRVAR(encode_filter_properties_doc, +"encode_filter_properties(filter) -> bytes\n" +"\n" +"Return a bytes object encoding the options (properties) of the filter\n" +"specified by *filter* (a dict).\n" +"\n" +"The result does not include the filter ID itself, only the options.\n" +"\n" +"This function is primarily of interest to users implementing custom\n" +"file formats.\n"); + +static PyObject * +encode_filter_properties(PyObject *self, PyObject *args) +{ + PyObject *filterspec; + lzma_filter filter; + lzma_ret lzret; + uint32_t encoded_size; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "O:encode_filter_properties", &filterspec)) + return NULL; + + if (parse_filter_spec(&filter, filterspec) == NULL) + return NULL; + + lzret = lzma_properties_size(&encoded_size, &filter); + if (catch_lzma_error(lzret)) + goto error; + + result = PyBytes_FromStringAndSize(NULL, encoded_size); + if (result == NULL) + goto error; + + lzret = lzma_properties_encode( + &filter, (uint8_t *)PyBytes_AS_STRING(result)); + if (catch_lzma_error(lzret)) + goto error; + + PyMem_Free(filter.options); + return result; + +error: + Py_XDECREF(result); + PyMem_Free(filter.options); + return NULL; +} + + +PyDoc_STRVAR(decode_filter_properties_doc, +"decode_filter_properties(filter_id, encoded_props) -> dict\n" +"\n" +"Return a dict describing a filter with ID *filter_id*, and options\n" +"(properties) decoded from the bytes object *encoded_props*.\n" +"\n" +"This function is primarily of interest to users implementing custom\n" +"file formats.\n"); + +static PyObject * +decode_filter_properties(PyObject *self, PyObject *args) +{ + Py_buffer encoded_props; + lzma_filter filter; + lzma_ret lzret; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "O&y*:decode_filter_properties", + lzma_vli_converter, &filter.id, &encoded_props)) + return NULL; + + lzret = lzma_properties_decode( + &filter, NULL, encoded_props.buf, encoded_props.len); + PyBuffer_Release(&encoded_props); + if (catch_lzma_error(lzret)) + return NULL; + + result = build_filter_spec(&filter); + + /* We use vanilla free() here instead of PyMem_Free() - filter.options was + allocated by lzma_properties_decode() using the default allocator. */ + free(filter.options); + return result; +} + + /* Module initialization. */ static PyMethodDef module_methods[] = { {"check_is_supported", (PyCFunction)check_is_supported, METH_VARARGS, check_is_supported_doc}, + {"encode_filter_properties", (PyCFunction)encode_filter_properties, + METH_VARARGS, encode_filter_properties_doc}, + {"decode_filter_properties", (PyCFunction)decode_filter_properties, + METH_VARARGS, decode_filter_properties_doc}, {NULL} };