Index: setup.py =================================================================== --- setup.py (revisjon 85890) +++ setup.py (arbeidskopi) @@ -1222,6 +1222,18 @@ else: missing.append('bz2') + # Per Øyvind Karlsen's lzma module. + if (self.compiler.find_library_file(lib_dirs, 'lzma')): + if sys.platform == "darwin": + lzma_extra_link_args = ('-Wl,-search_paths_first',) + else: + lzma_extra_link_args = () + exts.append( Extension('lzma', ['lzmamodule.c'], + libraries = ['lzma'], + extra_link_args = lzma_extra_link_args) ) + else: + missing.append('lzma') + # Interface to the Expat XML parser # # Expat was written by James Clark and is now maintained by a group of Index: Doc/library/lzma.rst =================================================================== --- Doc/library/lzma.rst (revisjon 0) +++ Doc/library/lzma.rst (revisjon 0) @@ -0,0 +1,451 @@ +:mod:`lzma` --- Compression compatible with :program:`xz` +=========================================================== + +.. module:: lzma + :synopsis: Interface to compression and decompression routines + compatible with lzma/xz. +.. moduleauthor:: Per Øyvind Karlsen +.. sectionauthor:: Per Øyvind Karlsen + + +This module provides a comprehensive interface for the lzma compression library. +It implements a complete file interface, one-shot (de)compression functions, and +types for sequential (de)compression. + +For other archive formats, see the :mod:`gzip`, :mod:`bz2`, :mod:`zipfile`, and +:mod:`tarfile` modules. + +Here is a summary of the features offered by the lzma module: + +* :class:`LZMAOptions` class is instantiated by the lzma module only as a + singleton to hold the various compression options and filters, with their + corresponding api documentation. + +* :class:`LZMAFile` class implements a complete file interface, including + :meth:`~LZMAFile.readline`, :meth:`~LZMAFile.readlines`, + :meth:`~LZMAFile.writelines`, :meth:`~LZMAFile.seek`, etc; + +* :class:`LZMAFile` class implements emulated :meth:`~LZMAFile.seek` support; + +* :class:`LZMAFile` class implements universal newline support; + +* :class:`LZMAFile` class offers an optimized line iteration using the readahead + algorithm borrowed from file objects; + +* Sequential (de)compression supported by :class:`LZMACompressor` and + :class:`LZMADecompressor` classes; + +* One-shot (de)compression supported by :func:`compress` and :func:`decompress` + functions; + +* Thread safety uses individual locking mechanism. + +Compression options +------------------------ + +The various compression options and filters available are available and +documented in the :class:`LZMAOptions` class. + + +.. class:: LZMAOptions + This class describes the different LZMA compression options and holds the + different min and max value constants for these in the variables. + + .. attribute:: lzma.options.check + Type of integrity check to use (XZ format only): + ``'crc32'``: CRC32 using the polynomial from the IEEE 802.3 standard. + ``'crc64'``: CRC64 using the polynomial from the ECMA-182 standard. (default) + ``'sha256'``: SHA-256. + ``'none'``: Don't use any integrity check. + + .. attribute:: lzma.options.compresslevel + Compression preset level (0 - 9, LZMA) + This will automatically set the values for the various compression + options. + Setting any of the other compression options at the same time as well + will override the specific value set by this preset level. + + Preset level settings: + compresslevel lc lp pb mode mf nice_len depth dict_size + 9 3 0 2 normal bt4 64 0 67108864 + 8 3 0 2 normal bt4 64 0 33554432 + 7 3 0 2 normal bt4 64 0 16777216 + 6 3 0 2 normal bt4 64 0 8388608 + 5 3 0 2 normal bt4 32 0 4194304 + 4 3 0 2 normal bt4 32 0 2097152 + 3 3 0 2 normal bt4 32 0 1048576 + 2 3 0 2 fast hc4 32 0 524288 + 1 3 0 2 fast hc3 32 0 65536 + 0 3 0 2 fast hc3 8 0 65536 + + .. attribute:: lzma.options.depth + Depth (also known as match finder cycles, LZMA) + Higher values give slightly better compression ratio but + decrease speed. Use special value 0 to let liblzma use + match-finder-dependent default value. + + .. attribute:: lzma.options.dict_size + Dictionary size in bytes (4096 - 1610612736, LZMA) + Dictionary size indicates how many bytes of the recently processed + uncompressed data is kept in memory. One method to reduce size of + the uncompressed data is to store distance-length pairs, which + indicate what data to repeat from the dictionary buffer. Thus, + the bigger the dictionary, the better compression ratio usually is. + + .. attribute:: lzma.options.dist + Delta distance (1 - 256, Delta) + With the only currently supported type, 'bytes', + the distance is as bytes. + + Examples: + - 16-bit stereo audio: distance = 4 bytes + - 24-bit RGB image data: distance = 3 bytes + + Default: ``1`` + + .. attribute:: lzma.options.format + File format to use for compression: + ``'xz'``: XZ format used by new xz tool. (default) + ``'alone'``: LZMA_Alone format used by older lzma utils. + ``'raw'``: Raw format. + + .. attribute:: lzma.options.id + Filter id + Available filters: + ``'lzma1'``: LZMA1 [LZMA] + ``'lzma2'``: LZMA2 [LZMA] + ``'x86'``: x86 [BCJ] + ``'powerpc'``: PowerPC (big endian only) [BCJ] + ``'ia64'``: IA64 (Itanium) [BCJ] + ``'arm'``: ARM (little endian only) [BCJ] + ``'armthumb'``: ARM-Thumb (little endian only) [BCJ] + ``'sparc'``: SPARC [BCJ] + ``'delta'``: Delta [Delta] + + .. attribute:: lzma.options.lc + Number of literal context bits (0 - 4, LZMA) + How many of the highest bits of the previous uncompressed + eight-bit byte (also known as `literal') are taken into + account when predicting the bits of the next literal. + + There is a limit that applies to literal context bits and literal + position bits together: lc + lp <= 4. Without this limit the + decoding could become very slow, which could have security related + results in some cases like email servers doing virus scanning. + + .. attribute:: lzma.options.lp + Number of literal position bits (0 - 4, LZMA) + How many of the lowest bits of the current position (number + of bytes from the beginning of the uncompressed data) in the + uncompressed data is taken into account when predicting the + bits of the next literal (a single eight-bit byte). + + .. attribute:: lzma.options.mf + Match Finder (LZMA) + Match finder has major effect on both speed and compression ratio. + Usually hash chains are faster than binary trees. + Available match finders: + ``hc3``: Binary Tree with 2 bytes hashing + Memory requirements: 9.5 * dict_size + 4 MiB + ``hc4``: Binary Tree with 3 bytes hashing + Memory requirements: 11.5 * dict_size + 4 MiB + ``bt2``: Binary Tree with 4 bytes hashing + Memory requirements: 11.5 * dict_size + 4 MiB + ``bt3``: Hash Chain with 3 bytes hashing + ``bt4``: Hash Chain with 4 bytes hashing + Memory requirements: 7.5 * dict_size + 4 MiB + + .. attribute:: lzma.options.mode + Available modes: (``'fast'`` or ``'normal'``, LZMA). + Fast mode is usually at its best when combined with a hash chain match + finder. + Best is usually notably slower than fast mode. Use this together with + binary tree match finders to expose the full potential of the LZMA + encoder. + + .. attribute:: lzma.options.nice_len + Nice length of a match (also known as number of fast bytes) (5 - 273, LZMA) + Nice length of match determines how many bytes the encoder + compares from the match candidates when looking for the best + match. Bigger fast bytes value usually increase both compression + ratio and time. + + .. attribute:: lzma.options.pb + Number of position bits Position bits (0 - 4, LZMA) + How many of the lowest bits of the current position in the + uncompressed data is taken into account when estimating + probabilities of matches. A match is a sequence of bytes for + which a matching sequence is found from the dictionary and + thus can be stored as distance-length pair. + + Example: If most of the matches occur at byte positions + of 8 * n + 3, that is, 3, 11, 19, ... set pb to 3, + because 2**3 == 8. + + .. attribute:: lzma.optinos.start + Start offset for conversions (BCJ) + This setting is useful only when the same filter is used + _separately_ for multiple sections of the same executable file, + and the sections contain cross-section branch/call/jump + instructions. In that case it is benefical to set the start + offset of the non-first sections so that the relative addresses + of the cross-section branch/call/jump instructions will use the + same absolute addresses as in the first section. + + Default: ``0`` + + .. attribute:: lzma.options.threads + Number of threads used for compression. + A value of 0 means one thread per available CPU. + + Default: ``1`` + + This feature is currently not implemented yet. + + +(De)compression of files +------------------------ + +Handling of compressed files is offered by the :class:`LZMAFile` class. + + +.. class:: LZMAFile(filename, mode='r', buffering=0, memlimit=-1, compresslevel=6, format='xz', check='crc64', threads=1, filter=({'id':'lzma2', 'extreme':False, 'dict_size':23, 'lc':3 'lp':0, 'pb':2, 'mode':2, 'nice_len':128, 'mf':'bt4', 'depth':0})) + + Open a lzma file. Mode can be either ``'r'`` or ``'w'``, for reading (default) + or writing. When opened for writing, the file will be created if it doesn't + exist, and truncated otherwise. If *buffering* is given, ``0`` means + unbuffered, and larger numbers specify the buffer size; the default is + ``0``. If *memlimit* is given, it will set the maxmium size of memory to use + for decompression, this could be used to prevent compressed files which has + really huge dictionary size from using up all memory; the default ``-1`` + means no limit. + If *compresslevel* is given, it must be a number between ``0`` and + ``9``; the default is ``6``. For information about the different levels as + well as for the *format*, *check*, *threads* and *filter* options, refer to + ``Compression options``. + Add a ``'U'`` to mode to open the file for input with universal newline + support. Any line ending in the input file will be seen as a ``'\n'`` in + Python. Also, a file so opened gains the attribute :attr:`newlines`; the + value for this attribute is one of ``None`` (no newline read yet), ``'\r'``, + ``'\n'``, ``'\r\n'`` or a tuple containing all the newline types seen. + Universal newlines are available only when reading. Instances support + iteration in the same way as normal :class:`file` instances. + + :class:`LZMAFile` supports the :keyword:`with` statement. + + .. method:: close() + + Close the file. Sets data attribute :attr:`closed` to true. A closed file + cannot be used for further I/O operations. :meth:`close` may be called + more than once without error. + + + .. method:: read([size]) + + Read at most *size* uncompressed bytes, returned as a byte string. If the + *size* argument is negative or omitted, read until EOF is reached. + + + .. method:: readline([size]) + + Return the next line from the file, as a byte string, retaining newline. + A non-negative *size* argument limits the maximum number of bytes to + return (an incomplete line may be returned then). Return an empty byte + string at EOF. + + + .. method:: readlines([size]) + + Return a list of lines read. The optional *size* argument, if given, is an + approximate bound on the total number of bytes in the lines returned. + + + .. method:: seek(offset[, whence]) + + Move to new file position. Argument *offset* is a byte count. Optional + argument *whence* defaults to ``os.SEEK_SET`` or ``0`` (offset from start + of file; offset should be ``>= 0``); other values are ``os.SEEK_CUR`` or + ``1`` (move relative to current position; offset can be positive or + negative), and ``os.SEEK_END`` or ``2`` (move relative to end of file; + offset is usually negative, although many platforms allow seeking beyond + the end of a file). + + Note that seeking of lzma files is emulated, and depending on the + parameters the operation may be extremely slow. + + + .. method:: tell() + + Return the current file position, an integer. + + + .. method:: write(data) + + Write the byte string *data* to file. Note that due to buffering, + :meth:`close` may be needed before the file on disk reflects the data + written. + + + .. method:: writelines(sequence_of_byte_strings) + + Write the sequence of byte strings to the file. Note that newlines are not + added. The sequence can be any iterable object producing byte strings. + This is equivalent to calling write() for each byte string. + + +Sequential (de)compression +-------------------------- + +Sequential compression and decompression is done using the classes +:class:`LZMACompressor` and :class:`LZMADecompressor`. + + +.. class:: LZMACompressor(compresslevel=6, format='xz', check='crc64', threads=1, filter=({'id':'lzma2', 'extreme':False, 'dict_size':23, 'lc':3 'lp':0, 'pb':2, 'mode':2, 'nice_len':128, 'mf':'bt4', 'depth':0}))) + + Create a new compressor object. This object may be used to compress data + sequentially. If you want to compress data in one shot, use the + :func:`compress` function instead. The *compresslevel* parameter, if given, + must be a number between ``0`` and ``9``; the default is ``6``. + For information about the different levels as well as for the *format*, + *check*, *threads* and *filter* options, refer to ``Compression options``. + + .. method:: compress(data) + + Provide more data to the compressor object. It will return chunks of + compressed data whenever possible. When you've finished providing data to + compress, call the :meth:`flush` method to finish the compression process, + and return what is left in internal buffers. + + + .. method:: flush(mode=LZMA_FINISH) + + Returns a string containing any remaining compressed data. + + 'mode' can be one of the constants LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, + LZMA_FINISH; the default value used when mode is not specified is + LZMA_FINISH. If mode == LZMA_FINISH, the compressor object can no longer + be used after calling the flush() method. Otherwise, more data can still + be compressed. + + +.. class:: LZMADecompressor(max_length=0, memlimit=-1) + + Create a new decompressor object. This object may be used to decompress data + sequentially. If you want to decompress data in one shot, use the + :func:`decompress` function instead. + + .. method:: decompress(data) + + Return a string containing the decompressed version of the data. + + After calling this function, some of the input data may still be stored in + internal buffers for later processing. + Call the flush() method to clear these buffers. + If *max_length* is specified then the return value will be + no longer than *max_length*. Unconsumed input data will be stored in + :attr:`unconsumed_tail` attribute. + + .. method:: flush(mode=LZMA_FINISH [, bufsize] ) + + Return a string containing any remaining decompressed data. + + If *bufsize* is given, is the initial size of the output buffer. + + The decompressor object cannot be used again after this call. + + + .. attribute:: unused_data + + A string which contains any bytes past the end of the compressed data. That + is, this remains ``""`` until the last byte that contains compression data + is available. If the whole string turned out to contain compressed data, + this is ``""``, the empty string. + + The only way to determine where a string of compressed data ends is by + actually decompressing it. This means that when compressed data is + contained part of a larger file, you can only find the end of it by reading + data and feeding it followed by some non-empty string into a decompression + object's :meth:`decompress` method until the :attr:`unused_data` attribute + is no longer the empty string. + + + .. attribute:: unconsumed_tail + + A string that contains any data that was not consumed by the last + :meth:`decompress` call because it exceeded the limit for the uncompressed + data buffer. This data has not yet been seen by the liblzma machinery, so + you must feed it (possibly with further data concatenated to it) back to a + subsequent :meth:`decompress` method call in order to get correct output. + + +One-shot (de)compression +------------------------ + +One-shot compression and decompression is provided through the :func:`compress` +and :func:`decompress` functions. + + +.. function:: compress(data, compresslevel=6, format='xz', check='crc64', threads=1, filter=({'id':'lzma2', 'extreme':False, 'dict_size':23, 'lc':3 'lp':0, 'pb':2, 'mode':2, 'nice_len':128, 'mf':'bt4', 'depth':0})) + + Compress *data* in one shot. If you want to compress data sequentially, use + an instance of :class:`LZMACompressor` instead. The *compresslevel* parameter, + if given, must be a number between ``0`` and ``9``; the default is ``6``. + For information about the different levels as well as for the *format*, + *check*, *threads* and *filter* options, refer to ``Compression options``. + +.. function:: crc32(data[, value]) + + .. index:: + single: Cyclic Redundancy Check + single: checksum; Cyclic Redundancy Check + + Computes a CRC (Cyclic Redundancy Check) checksum of *data*. If *value* is + present, it is used as the starting value of the checksum; otherwise, a fixed + default value is used. This allows computing a running checksum over the + concatenation of several inputs. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. + Since the algorithm is designed for use as a checksum algorithm, it is not + suitable for use as a general hash algorithm. + + Always returns an unsigned 32-bit integer. + +.. note:: + To generate the same numeric value across all Python versions and + platforms use crc32(data) & 0xffffffff. If you are only using + the checksum in packed binary format this is not necessary as the + return value is the correct 32bit binary representation + regardless of sign. + +.. function:: crc64(data[, value]) + + .. index:: + single: Cyclic Redundancy Check + single: checksum; Cyclic Redundancy Check + + Computes a CRC (Cyclic Redundancy Check) checksum of *data*. If *value* is + present, it is used as the starting value of the checksum; otherwise, a fixed + default value is used. This allows computing a running checksum over the + concatenation of several inputs. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. + Since the algorithm is designed for use as a checksum algorithm, it is not + suitable for use as a general hash algorithm. + + Always returns an unsigned 64-bit integer. + +.. note:: + To generate the same numeric value across all Python versions and + platforms use crc64(data) & 0xffffffffffffffff. If you are only using + the checksum in packed binary format this is not necessary as the + return value is the correct 64bit binary representation + regardless of sign. + +.. function:: decompress(data, bufsize=8192, memlimit=-1, flags=LZMA_CONCATENATED|LZMA_TELL_UNSUPPORTED_CHECK) + + Decompress *data* in one shot. If you want to decompress data sequentially, + use an instance of :class:`LZMADecompressor` instead. + + Optional arg *bufsize* is the initial output buffer size. + Optional arg *memlimit* is the maximum amount of memory the decoder may use, + -1 means no limit. + + Index: Lib/test/test_lzma.py =================================================================== --- Lib/test/test_lzma.py (revisjon 0) +++ Lib/test/test_lzma.py (revisjon 0) @@ -0,0 +1,585 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Python Bindings for LZMA +# +# Copyright (c) 2008 Per Øyvind Karlsen +# liblzma Copyright (C) 2007-2008 Lasse Collin +# LZMA SDK Copyright (C) 1999-2007 Igor Pavlov +# Based much on regression tests for pylzma by Joachim Bauch +# & bz2 by Gustavo Niemeyer +# +import sys, random +import lzma +import unittest +import os +from test import support +from test.support import TESTFN, findfile + +from hashlib import md5 +from io import BytesIO + + +# cache random strings to speed up tests +_random_strings = {} +def generate_random(size, choice=random.choice): + global _random_strings + if size in _random_strings: + return _random_strings[size] + + s = random._urandom(size) + _random_strings[size] = s + return s + +class TestLZMA(unittest.TestCase): + + def setUp(self): + self.plain = b"Her kommer kaptein klem, superhelten som aldri er slem! kos&klem! :o)" + self.compressed_xz = b"\xfd7zXZ\x00\x00\x04\xe6\xd6\xb4F\x02\xc0GE!\x01\x16\x00\xc9\xbd\x80l\xe0\x00D\x00?]\x00$\x19JB\x02\xf3\xd3R\x97%\x08$\xd0\xd2\x0f@\x82\x85[\xf6(\x11\xd8\\[1n0\xb9'\xea\xfcV\x81b\xd3?\x18\x011D\xb9\x9a\xe2\x9e g\xe0\x9d`\xc6\x9f\xac\x0b\\\xaa\xf4J\n\x07\x18\xbc\x00\x00\x00L\xbe\x17\xec\x95:\xfa\xdc\x00\x01[E\xab\x84\x7f\x7f\x1f\xb6\xf3}\x01\x00\x00\x00\x00\x04YZ" + self.compressed_stream_xz = b"\xfd7zXZ\x00\x00\x04\xe6\xd6\xb4F\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3\xe0\x00D\x00?]\x00$\x19JB\x02\xf3\xd3R\x97%\x08$\xd0\xd2\x0f@\x82\x85[\xf6(\x11\xd8\\[1n0\xb9'\xea\xfcV\x81b\xd3?\x18\x011D\xb9\x9a\xe2\x9e g\xe0\x9d`\xc6\x9f\xac\x0b\\\xaa\xf4J\n\x07\x18\xbc\x00\x00\x00L\xbe\x17\xec\x95:\xfa\xdc\x00\x01[E\xab\x84\x7f\x7f\x1f\xb6\xf3}\x01\x00\x00\x00\x00\x04YZ" + self.compressed_alone = b"]\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00$\x19JB\x02\xf3\xd3R\x97%\x08$\xd0\xd2\x0f@\x82\x85[\xf6(\x11\xd8\\[1n0\xb9'\xea\xfcV\x81b\xd3?\x18\x011D\xb9\x9a\xe2\x9e g\xe0\x9d`\xc6\x9f\xac\x0b\\\xaa\xf4J\n\x0f\xbe\x15c\xd7\xfb\x11X\x00" + self.compressed_stream_alone = b"]\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00$\x19JB\x02\xf3\xd3R\x97%\x08$\xd0\xd2\x0f@\x82\x85[\xf6(\x11\xd8\\[1n0\xb9'\xea\xfcV\x81b\xd3?\x18\x011D\xb9\x9a\xe2\x9e g\xe0\x9d`\xc6\x9f\xac\x0b\\\xaa\xf4J\n\x0f\xbe\x15c\xd7\xfb\x11X\x00" + self.data_large = generate_random(1<<20) + + def test_compression(self): + compressed = lzma.compress(self.plain, format='xz') + self.assertEqual(compressed, self.compressed_xz) + + def test_decompression(self): + decompressed = lzma.decompress(self.compressed_xz) + self.assertEqual(decompressed, self.plain) + + def test_compression_decompression(self, dict_size=1<<23): + # call compression and decompression on random data of various sizes + for i in range(18): + size = 1 << i + original = generate_random(size) + # FIXME: + """ + result = lzma.decompress(lzma.compress(original, format='alone', filter=({'id':'lzma1', 'dict_size':dict_size}))) + self.assertEqual(len(result), size) + self.assertEqual(md5(original).hexdigest(), md5(result).hexdigest()) + """ + result = lzma.decompress(lzma.compress(original, format='xz', filter=({'id':'lzma2', 'dict_size':dict_size}))) + self.assertEqual(len(result), size) + self.assertEqual(md5(original).hexdigest(), md5(result).hexdigest()) + + def test_multi(self): + # call compression and decompression multiple times to detect memory leaks... + for x in range(4): + self.test_compression_decompression(dict_size=1<<26) + + def test_decompression_stream(self): + # test decompression object in one steps + decompress = lzma.LZMADecompressor() + data = decompress.decompress(self.compressed_xz) + data += decompress.flush() + self.assertEqual(data, self.plain) + + def test_decompression_stream_two(self): + # test decompression in two steps + decompress = lzma.LZMADecompressor() + data = decompress.decompress(self.compressed_stream_xz[:10]) + data += decompress.decompress(self.compressed_stream_xz[10:]) + data += decompress.flush() + self.assertEqual(data, self.plain) + + def test_decompression_stream_props(self): + # test decompression with properties in separate step + decompress = lzma.LZMADecompressor() + data = decompress.decompress(self.compressed_stream_xz[:5]) + data += decompress.decompress(self.compressed_stream_xz[5:]) + data += decompress.flush() + self.assertEqual(data, self.plain) + + def test_decompression_streaming(self): + # test decompressing with one byte at a time... + decompress = lzma.LZMADecompressor() + infile = BytesIO(self.compressed_stream_xz) + outfile = BytesIO() + while 1: + data = infile.read(1) + if not data: break + outfile.write(decompress.decompress(data)) + outfile.write(decompress.flush()) + self.assertEqual(outfile.getvalue(), self.plain) + + def test_compression_stream(self): + # test compression object in one steps + compress = lzma.LZMACompressor(format='alone') + data = compress.compress(self.plain) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_alone) + compress = lzma.LZMACompressor(format='xz') + data = compress.compress(self.plain) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_xz) + + def test_compression_stream_two(self): + # test compression in two steps + compress = lzma.LZMACompressor(format='alone') + data = compress.compress(self.plain[:10]) + data += compress.compress(self.plain[10:]) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_alone) + compress = lzma.LZMACompressor(format='xz') + data = compress.compress(self.plain[:10]) + data += compress.compress(self.plain[10:]) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_xz) + + def test_compression_stream_props(self): + # test compression with properties in separate step + compress = lzma.LZMACompressor(format='alone') + data = compress.compress(self.plain[:5]) + data += compress.compress(self.plain[5:]) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_alone) + compress = lzma.LZMACompressor(format='xz') + data = compress.compress(self.plain[:5]) + data += compress.compress(self.plain[5:]) + data += compress.flush() + self.assertEqual(data, self.compressed_stream_xz) + + def test_compression_streaming(self): + # test compressing with one byte at a time... + compress = lzma.LZMACompressor(format='alone') + infile = BytesIO(self.plain) + outfile = BytesIO() + while 1: + data = infile.read(1) + if not data: break + outfile.write(compress.compress(data)) + outfile.write(compress.flush()) + self.assertEqual(outfile.getvalue(), self.compressed_stream_alone) + compress = lzma.LZMACompressor(format='xz') + infile = BytesIO(self.plain) + outfile = BytesIO() + while 1: + data = infile.read(1) + if not data: break + outfile.write(compress.compress(data)) + outfile.write(compress.flush()) + self.assertEqual(outfile.getvalue(), self.compressed_stream_xz) + + + def test_compress_large_string(self): + # decompress large block of repeating data, string version + compressed = lzma.compress(self.data_large) + self.assertTrue(self.data_large == lzma.decompress(compressed)) + + def test_decompress_large_stream(self): + # decompress large block of repeating data, stream version + decompress = lzma.LZMADecompressor() + infile = BytesIO(lzma.compress(self.data_large, format='alone')) + outfile = BytesIO() + while 1: + tmp = infile.read(1) + if not tmp: break + outfile.write(decompress.decompress(tmp)) + outfile.write(decompress.flush()) + self.assertTrue(self.data_large == outfile.getvalue()) + decompress = lzma.LZMADecompressor() + infile = BytesIO(lzma.compress(self.data_large, format='xz')) + outfile = BytesIO() + while 1: + tmp = infile.read(1) + if not tmp: break + outfile.write(decompress.decompress(tmp)) + outfile.write(decompress.flush()) + self.assertTrue(self.data_large == outfile.getvalue()) + + def test_decompress_large_stream_bigchunks(self): + # decompress large block of repeating data, stream version with big chunks + decompress = lzma.LZMADecompressor() + infile = BytesIO(lzma.compress(self.data_large)) + outfile = BytesIO() + while 1: + tmp = infile.read(1024) + if not tmp: break + outfile.write(decompress.decompress(tmp)) + outfile.write(decompress.flush()) + self.assertTrue(self.data_large == outfile.getvalue()) + + def test_compress_large_stream(self): + # compress large block of repeating data, stream version + compress = lzma.LZMACompressor(format='alone') + infile = BytesIO(self.data_large) + outfile = BytesIO() + while 1: + tmp = infile.read(1) + if not tmp: break + outfile.write(compress.compress(tmp)) + outfile.write(compress.flush()) + self.assertTrue(self.data_large == lzma.decompress(outfile.getvalue())) + compress = lzma.LZMACompressor(format='xz') + infile = BytesIO(self.data_large) + outfile = BytesIO() + while 1: + tmp = infile.read(1) + if not tmp: break + outfile.write(compress.compress(tmp)) + outfile.write(compress.flush()) + self.assertTrue(self.data_large == lzma.decompress(outfile.getvalue())) + + def test_compress_large_stream_bigchunks(self): + # compress large block of repeating data, stream version with big chunks + compress = lzma.LZMACompressor(format='alone') + infile = BytesIO(self.data_large) + outfile = BytesIO() + while 1: + tmp = infile.read(1024) + if not tmp: break + outfile.write(compress.compress(tmp)) + outfile.write(compress.flush()) + self.assertTrue(self.data_large == lzma.decompress(outfile.getvalue())) + compress = lzma.LZMACompressor(format='xz') + infile = BytesIO(self.data_large) + outfile = BytesIO() + while 1: + tmp = infile.read(1024) + if not tmp: break + outfile.write(compress.compress(tmp)) + outfile.write(compress.flush()) + self.assertTrue(self.data_large == lzma.decompress(outfile.getvalue())) + + def test_decompress_lzma_alone_format(self): + infile = open(findfile('teststring.lzma'), 'rb') + outstring = lzma.decompress(infile.read()) + infile.close() + self.assertEqual(self.plain, outstring) + + def test_decompress_xz_format(self): + infile = open(findfile('teststring.xz'), 'rb') + outstring = lzma.decompress(infile.read()) + infile.close() + self.assertEqual(self.plain, outstring) + +class TestLZMAOptions(unittest.TestCase): + def setUp(self): + self.data = b'kosogklem'*(1<<10) + + def test_preset_compresslevels(self): + for lvl in range(lzma.options.compresslevel[0], lzma.options.compresslevel[1]+1): + result = lzma.compress(self.data, compresslevel=lvl) + self.assertEqual(self.data, lzma.decompress(result)) + self.assertRaises(ValueError, lzma.compress, self.data, compresslevel=lzma.options.compresslevel[1]+1) + self.assertRaises(ValueError, lzma.compress, self.data, compresslevel=lzma.options.compresslevel[0]-1) + + def test_dict_size(self): + dict = lzma.options.dict_size[0] + while dict <= 1<<26: # lzma.options.dict_size[1]: Since using very large dictionaries requires + # very large amount of memory, let's not go beyond 64mb for testing.. + result = lzma.compress(self.data, filter=({'dict_size':dict})) + self.assertEqual(self.data, lzma.decompress(result)) + dict = dict * 2 + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'dict_size':lzma.options.dict_size[1]+1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'dict_size':lzma.options.dict_size[0]-1})) + + def test_nice_len(self): + for nl in range(lzma.options.nice_len[0], lzma.options.nice_len[1]+1): + result = lzma.compress(self.data, filter=({'nice_len':nl})) + self.assertEqual(self.data, lzma.decompress(result)) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'nice_len':lzma.options.nice_len[1]+1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'nice_len':lzma.options.nice_len[0]-1})) + + def test_lclp(self): + for lcb in range(lzma.options.lc[0], lzma.options.lc[1]+1): + for lpb in range(lzma.options.lc[1]-lcb): + result = lzma.compress(self.data, filter=({'lc':lcb, 'lp':lpb})) + self.assertEqual(self.data, lzma.decompress(result)) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'lc':lzma.options.lc[0]-1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'lc':lzma.options.lc[1]+1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'lp':lzma.options.lp[0]-1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'lp':lzma.options.lp[1]+1})) + + def test_pb(self): + for pb in range(lzma.options.pb[0], lzma.options.pb[1]+1): + result = lzma.compress(self.data, filter=({'pb':pb})) + self.assertEqual(self.data, lzma.decompress(result)) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'pb':lzma.options.pb[0]-1})) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'pb':lzma.options.pb[1]+1})) + + def test_mode(self): + for md in lzma.options.mode: + result = lzma.decompress(lzma.compress(self.data, filter=({'mode':md}))) + self.assertEqual(self.data, result) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'mode':'foo'})) + + def test_mf(self): + for match_finder in lzma.options.mf: + result = lzma.decompress(lzma.compress(self.data, filter=({'mf':match_finder}))) + self.assertEqual(self.data, result) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'mf':'1234'})) + + def test_depth(self): + for d in range(lzma.options.depth, 20): + result = lzma.decompress(lzma.compress(self.data, filter=({'depth':d}))) + self.assertEqual(self.data, result) + self.assertRaises(ValueError, lzma.compress, self.data, filter=({'depth':-1})) + + def test_format(self): + for format in lzma.options.format: + # As raw format has no header, lzma.decompress won't be able to identify it, so skip it.. + if format == "raw": continue + result = lzma.decompress(lzma.compress(self.data, format=format)) + self.assertEqual(self.data, result) + self.assertRaises(ValueError, lzma.compress, self.data, format='foo') + +class TestLZMAFile(unittest.TestCase): + "Test lzma.LZMAFile type miscellaneous methods." + + TEXT = b"root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n" + + DATA = b"]\x00\x00\x80\x00\x02\x03\x00\x00\x00\x00\x00\x00\x009\x1b\xec\xe8:-\x7f\xca\\\xf7\xb4C\xb1\xf1<<\xaf5\x10\x92\xd2\x14,\x13+\xef\xf7\x8cCGl\xb1\x97\x00\x00\xb5j\x9a\xdc\x1e\xf2X\x8b\xd9\xebM8\x8d\\l'c\xd6t\xd5\x861\x8e\xc5W7\xdd\x8c\x8d\x01\xec^\xbc\xdb\xf6\xde\xda\xdc\x93\xc3\x0c|E\x8d\xb2DD\xac1\x84\xfck\xa1_i\x7f\xcb\xd4\x99\n\xe9\x9b\xa86\xces\xb3\xd2f\xd8r^8 \x95\x98\xeb\xdb\\\xd3\xfbY\xef\xcfW]\x13|9\xdb\x92C\xc6\xf2W\xd9h\xe8^i \xd6\x88n\xf1\xcf\x83H\xd8\xfd\x1bz\xce\xe6\xfc\xb4\xa0\xbb\x9c\xde,\x96\x88\xa0\xe7\x80\xa8K\xdb]Wy\xb4\xbc\xfac)!\xfcS\xef\x07\xb8\xfbx\xe7\xe5\x02\xd54;+\xb3m5\xd2\x00V\x8b\x9a\x11H\x8d\xa8e>\xddd\xc4xH\x90\xa4Y\x97\xab\x9d\x9e\x9e$\xa2.#?G\x8d\xfd\xefn\xcb\xb5mjB\t@\xc7H\x07/<=?\x08@\xca\xca\x85\xd0Nb\xd1\xfe\x83s\xbb\x14\xa1\te\xcf\x1d?\x077n\x14%\x02\xc5\xf2c\xfe\x12H\x19\xea@\x0b\xc2\xf2\x8d^\x93w\x9f\x9a\x1bw\xf4\xcb\xc7z\xb7Iep\x17u\xa7\x1d\xaa:0\xdc\xaf\x80h\x93+\xbdg\xbf\x16\xce\x93\xcc\x8b\xecus<\xc6+,%UJ0\x8e\xfb4\x85\x11D\xf5j\xf0{\xa8\x0c\xb0U\\[\x18fJ{\xf6A\xfb\xfd\x19\xe8\xf4\xa3k;\x08\x07\x07\n[\xfd\x7f\x91\xfe\x8a\xaf\x1fC}4x:\x8e\xd1^Sw\x18\x1c}g\xb07" + + DATA_CRLF = b"]\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x009\x1b\xec\xe8:-\x7f\xca\\\xf7\xb4C\xb1\xf1<<\xaf5\x10\x92\xd2\x14,>\xc4&\xef\x90[\xa1\xa6%\xfaS\x17\xf6/\xd1\xa8\x87\xbb\xb3b\xe7F\x10\nu\xb4\x96\x8bs\xe1\xda\xfdQ~-\xb5T\xa5i-;\x01m$\xebPl[\xe0\x14F\x12\xc2\x88\xbc\xdb\x85\xfc\x90\x1dz\xb7\xfe\xca\xec\xe3\x92\x14\xdb\x88\x16?\x9e\xdaU\x9cJR\x0eF\x0e]u^\xbe\xcbb\xbe\xb7\xbc;\x8b\xfb\xd2\xc1\xd5\xe3\x99\x80\xeb\xcaP\xb8\x9e\xd4\xd7\xd5\x0e\x93$\xff\x81G\xd85\xba\xa1~{\xb3U\x1dV\xf0\x0eA\t=\x95fT/#0\xe1\xfa`\xb64>D\x05=pp\xc6\xd6\xdbnL>\xba\x03\xf6F\x8e W\xdb\x13T\xff+~\x96\xf1-=?\xfc\xe4\xa8PP`\xef\xce\xe9\xb5\xe5O\xff\xaf\xb7$\xc2?m\xdb\x13\x7f[U\x17\x16\nO\xaf\xf2\xa4\xbd)$\x8f\x86\x8b\x0e\x8f\xc7\x96\xba\xf6\xfe\xa4\x06\xd2q\xf5\x03I?\x8f\xf8\xf0$`\xc7\xc8\xcb\xa6\xcd\xea\x8b[k\xb4_c\x1c|H\xcf\x12\x8b\xec\x85s\xde\xa1\xce\xe83W\x87\x03E\x16\x10\xf7\x94\x80\xc3R\xae\xb0\xce\xc5\x05\x9d\x06I\xa9\xbcW\x1f\x8b\xe1\xbc\x83\xea\xfaNJE\t\xa54\xfa`B\xf9\x17;Z4\xfa\xf5\x81f\xbe&\xe6^}<\t^\x9b\xc7\x9a\xa7\x99E\x8e\xbc\xe4\xa2\x04\x91\xf2S\x06#\x9c\x88\xd1\x9c]\xf4\xc3\xa7\x80\x15*#7fTZ\xa0\xe3\x85\xc4k\xac\xf7L\x1b\xc0\xf8\xa7\x0b\xe84\xdbf\x04\xd7\x087Pl\xff\xd2\x9c\x8cl" + + def setUp(self): + self.filename = TESTFN + + def tearDown(self): + if os.path.isfile(self.filename): + os.unlink(self.filename) + + def createTempFile(self, crlf=0): + f = open(self.filename, "wb") + if crlf: + data = self.DATA_CRLF + else: + data = self.DATA + f.write(data) + f.close() + + def testRead(self): + # "Test lzma.LZMAFile.read()" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + self.assertRaises(TypeError, lzmaf.read, None) + self.assertEqual(lzmaf.read(), self.TEXT) + lzmaf.close() + + def testReadChunk10(self): + # "Test lzma.LZMAFile.read() in chunks of 10 bytes" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + text = b'' + while 1: + str = lzmaf.read(10) + if not str: + break + text += str + self.assertEqual(text, text) + lzmaf.close() + + def testRead100(self): + # "Test lzma.LZMAFile.read(100)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + self.assertEqual(lzmaf.read(100), self.TEXT[:100]) + lzmaf.close() + + def testReadLine(self): + # "Test lzma.LZMAFile.readline()" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + self.assertRaises(TypeError, lzmaf.readline, None) + sio = BytesIO(self.TEXT) + for line in sio.readlines(): + self.assertEqual(lzmaf.readline(), line) + lzmaf.close() + + def testReadLines(self): + # "Test lzma.LZMAFile.readlines()" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + self.assertRaises(TypeError, lzmaf.readlines, None) + sio = BytesIO(self.TEXT) + self.assertEqual(lzmaf.readlines(), sio.readlines()) + lzmaf.close() + + def testIterator(self): + # "Test iter(lzma.LZMAFile)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + sio = BytesIO(self.TEXT) + self.assertEqual(list(iter(lzmaf)), sio.readlines()) + lzmaf.close() + + def testWrite(self): + # "Test lzma.LZMAFile.write()" + lzmaf = lzma.LZMAFile(self.filename, "w") + self.assertRaises(TypeError, lzmaf.write) + lzmaf.write(self.TEXT) + lzmaf.close() + f = open(self.filename, 'rb') + self.assertEqual(lzma.decompress(f.read()), self.TEXT) + f.close() + + def testWriteChunks10(self): + # "Test lzma.LZMAFile.write() with chunks of 10 bytes" + lzmaf = lzma.LZMAFile(self.filename, "w") + n = 0 + while 1: + str = self.TEXT[n*10:(n+1)*10] + if not str: + break + lzmaf.write(str) + n += 1 + lzmaf.close() + f = open(self.filename, 'rb') + self.assertEqual(lzma.decompress(f.read()), self.TEXT) + f.close() + + def testWriteLines(self): + # "Test lzma.LZMAFile.writelines()" + lzmaf = lzma.LZMAFile(self.filename, "w") + self.assertRaises(TypeError, lzmaf.writelines) + sio = BytesIO(self.TEXT) + lzmaf.writelines(sio.readlines()) + lzmaf.close() + # patch #1535500 + self.assertRaises(ValueError, lzmaf.writelines, ["a"]) + f = open(self.filename, 'rb') + self.assertEqual(lzma.decompress(f.read()), self.TEXT) + f.close() + + def testWriteMethodsOnReadOnlyFile(self): + lzmaf = lzma.LZMAFile(self.filename, "w") + lzmaf.write(b'abc') + lzmaf.close() + + lzmaf = lzma.LZMAFile(self.filename, "r") + self.assertRaises(IOError, lzmaf.write, b"a") + self.assertRaises(IOError, lzmaf.writelines, [b"a"]) + + def testSeekForward(self): + # "Test lzma.LZMAFile.seek(150, 0)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + self.assertRaises(TypeError, lzmaf.seek) + lzmaf.seek(150) + self.assertEqual(lzmaf.read(), self.TEXT[150:]) + lzmaf.close() + + def testSeekBackwards(self): + # "Test lzma.LZMAFile.seek(-150, 1)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + lzmaf.read(500) + lzmaf.seek(-150, 1) + self.assertEqual(lzmaf.read(), self.TEXT[500-150:]) + lzmaf.close() + + def testSeekBackwardsFromEnd(self): + # "Test lzma.LZMAFile.seek(-150, 2)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + lzmaf.seek(-150, 2) + self.assertEqual(lzmaf.read(), self.TEXT[len(self.TEXT)-150:]) + lzmaf.close() + + def testSeekPostEnd(self): + # "Test lzma.LZMAFile.seek(150000)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + lzmaf.seek(150000) + self.assertEqual(lzmaf.tell(), len(self.TEXT)) + self.assertEqual(lzmaf.read(), b'') + lzmaf.close() + + def testSeekPostEndTwice(self): + # "Test lzma.LZMAFile.seek(150000) twice" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + lzmaf.seek(150000) + lzmaf.seek(150000) + self.assertEqual(lzmaf.tell(), len(self.TEXT)) + self.assertEqual(lzmaf.read(), b'') + lzmaf.close() + + def testSeekPreStart(self): + # "Test lzma.LZMAFile.seek(-150, 0)" + self.createTempFile() + lzmaf = lzma.LZMAFile(self.filename) + lzmaf.seek(-150) + self.assertEqual(lzmaf.tell(), 0) + self.assertEqual(lzmaf.read(), self.TEXT) + lzmaf.close() + + def testOpenDel(self): + # "Test opening and deleting a file many times" + self.createTempFile() + for i in range(10000): + o = lzma.LZMAFile(self.filename) + del o + + def testOpenNonexistent(self): + # "Test opening a nonexistent file" + self.assertRaises(IOError, lzma.LZMAFile, "/non/existent") + + def testBug1191043(self): + # readlines() for files containing no newline + data = b']\x00\x00\x80\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00*\x19Jgkp8\x80' + f = open(self.filename, "wb") + f.write(data) + f.close() + lzmaf = lzma.LZMAFile(self.filename) + lines = lzmaf.readlines() + lzmaf.close() + self.assertEqual(lines, [b'Test']) + lzmaf = lzma.LZMAFile(self.filename) + xlines = list(lzmaf.readlines()) + lzmaf.close() + self.assertEqual(xlines, [b'Test']) + +class ChecksumTestCase(unittest.TestCase): + # checksum test cases + def test_crc32start(self): + self.assertEqual(lzma.crc32(""), lzma.crc32("", 0)) + self.assert_(lzma.crc32("abc", 0xffffffff)) + + def test_crc32empty(self): + self.assertEqual(lzma.crc32("", 0), 0) + self.assertEqual(lzma.crc32("", 1), 1) + self.assertEqual(lzma.crc32("", 432), 432) + + def assertEqual32(self, seen, expected): + # 32-bit values masked -- checksums on 32- vs 64- bit machines + # This is important if bit 31 (0x08000000L) is set. + self.assertEqual(seen & 0x0ffffffff, expected & 0x0ffffffff) + + def test_penguins32(self): + self.assertEqual32(lzma.crc32("penguin", 0), 0x0e5c1a120) + self.assertEqual32(lzma.crc32("penguin", 1), 0x43b6aa94) + + self.assertEqual(lzma.crc32("penguin"), lzma.crc32("penguin", 0)) + + # These crc64 tests needs to be reviewed.. + def test_crc64start(self): + self.assertEqual(lzma.crc64(""), lzma.crc64("", 0)) + self.assert_(lzma.crc64("abc", 0xffffffff)) + + def test_crc64empty(self): + self.assertEqual(lzma.crc64("", 0), 0) + self.assertEqual(lzma.crc64("", 1), 1) + self.assertEqual(lzma.crc64("", 432), 432) + + def assertEqual64(self, seen, expected): + self.assertEqual(seen & 0xffffffffffffffff, expected & 0xffffffffffffffff) + + def test_penguins64(self): + self.assertEqual64(lzma.crc64("penguin", 0), 0x9285a18e774b3258) + self.assertEqual64(lzma.crc64("penguin", 1), 0xb06aacd743b256b4) + + self.assertEqual(lzma.crc64("penguin"), lzma.crc64("penguin", 0)) + +def test_main(): + from test import support + support.run_unittest(TestLZMA) + support.run_unittest(TestLZMAOptions) + support.run_unittest(TestLZMAFile) + support.run_unittest(ChecksumTestCase) + +if __name__ == "__main__": + test_main() Index: Modules/lzmamodule.c =================================================================== --- Modules/lzmamodule.c (revisjon 0) +++ Modules/lzmamodule.c (revisjon 0) @@ -0,0 +1,3405 @@ +/* + +pyliblzma - python liblzma interface + +Copyright (c) 2007-2010 Per Øyvind Karlsen + +Based on: +python-bz2 - python bz2 library interface + +Copyright (c) 2002 Gustavo Niemeyer +Copyright (c) 2002 Python Software Foundation; All Rights Reserved + +*/ + +/* To handle length as ssize_t in stead of int, otherwise we'd have to + * use the internal _PyArg_ParseTuple_SizeT function to avoid screwups + */ +#define PY_SSIZE_T_CLEAN 1 +#include "Python.h" +#include +#include "structmember.h" + +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#define VERSION "0.6.0" + +static const char __author__[] = +"The lzma python module was written by:\n\ +\n\ + Per Øyvind Karlsen \n\ +"; + +/* Our very own off_t-like type, 64-bit if possible */ +/* copied from Objects/fileobject.c */ +#if !defined(HAVE_LARGEFILE_SUPPORT) +typedef off_t Py_off_t; +#elif SIZEOF_OFF_T >= 8 +typedef off_t Py_off_t; +#elif SIZEOF_FPOS_T >= 8 +typedef fpos_t Py_off_t; +#else +#error "Large file support, but neither off_t nor fpos_t is large enough." +#endif + +#define BUF(v) PyBytes_AS_STRING(v) + +#define LZMAFileObject_Check(v) (Py_TYPE(v) == &LZMAFile_Type) + +typedef enum file_mode_e { + MODE_CLOSED = 0, + MODE_READ = 1, + MODE_READ_EOF = 2, + MODE_WRITE = 3 +} file_mode; + +#ifdef WITH_THREAD +#define ACQUIRE_LOCK(obj) do { \ + if (!PyThread_acquire_lock(obj->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock(obj->lock, 1); \ + Py_END_ALLOW_THREADS \ + } } while(0) +#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock) +#else +#define ACQUIRE_LOCK(obj) +#define RELEASE_LOCK(obj) +#endif + +#define INITCHECK if (!self->is_initialised) { PyErr_Format(PyExc_RuntimeError, "%s object not initialised!", Py_TYPE(self)->tp_name); return NULL; } + +/* ===================================================================== */ +/* liblzma has no FILE-like interface, so we need to define our own */ + +typedef struct lzma_file { + unsigned char buf[1<<15]; + lzma_stream strm; + FILE *fp; + int encoding; + int eof; +} lzma_FILE; + +static lzma_FILE * +lzma_open(lzma_ret *lzma_error, lzma_filter *filters, lzma_check check, FILE *fp, unsigned long long memlimit) +{ + lzma_ret *ret = lzma_error; + int encoding = filters[0].options ? 1 : 0; + lzma_FILE *lzma_file; + lzma_stream tmp = LZMA_STREAM_INIT; + + if (!fp) + return NULL; + + lzma_file = PyMem_Malloc(sizeof(*lzma_file)); + + if (!lzma_file) + return NULL; + + lzma_file->fp = fp; + lzma_file->encoding = encoding; + lzma_file->eof = 0; + lzma_file->strm = tmp; + + if (encoding) { + if(filters[0].id == LZMA_FILTER_LZMA1) + *ret = lzma_alone_encoder(&lzma_file->strm, filters[0].options); + else if(check == (lzma_check)-1) + *ret = lzma_raw_encoder(&lzma_file->strm, filters); + else + *ret = lzma_stream_encoder(&lzma_file->strm, filters, check); + } else + *ret = lzma_auto_decoder(&lzma_file->strm, memlimit, 0); + + if (*ret != LZMA_OK) { + PyMem_Free(lzma_file); + return NULL; + } + return lzma_file; +} + +static int +lzma_close(lzma_ret *lzma_error, lzma_FILE *lzma_file) +{ + lzma_ret *ret = lzma_error; + int retval = 0; + size_t n; + + if (!lzma_file) + return -1; + if (lzma_file->encoding) { + for (;;) { + lzma_file->strm.avail_out = sizeof(lzma_file->buf); + lzma_file->strm.next_out = (unsigned char *)lzma_file->buf; + *ret = lzma_code(&lzma_file->strm, LZMA_FINISH); + if (*ret != LZMA_OK && *ret != LZMA_STREAM_END) + { + retval = -1; + break; + } + n = sizeof(lzma_file->buf) - lzma_file->strm.avail_out; + if (n && fwrite(lzma_file->buf, 1, n, lzma_file->fp) != n) + { + retval = -1; + break; + } + if (*ret == LZMA_STREAM_END) + break; + } + } else + *ret = LZMA_OK; + + lzma_end(&lzma_file->strm); + return retval; +} + +static ssize_t lzma_read(lzma_ret *lzma_error, lzma_FILE *lzma_file, void *buf, size_t len) +{ + lzma_ret *ret = lzma_error; + int eof = 0; + + if (!lzma_file || lzma_file->encoding) + return -1; + if (lzma_file->eof) + return 0; + + lzma_file->strm.next_out = buf; + lzma_file->strm.avail_out = len; + for (;;) { + if (!lzma_file->strm.avail_in) { + lzma_file->strm.next_in = (unsigned char *)lzma_file->buf; + lzma_file->strm.avail_in = fread(lzma_file->buf, 1, sizeof(lzma_file->buf), lzma_file->fp); + if (!lzma_file->strm.avail_in) + eof = 1; + } + *ret = lzma_code(&lzma_file->strm, LZMA_RUN); + if (*ret == LZMA_STREAM_END) { + lzma_file->eof = 1; + return len - lzma_file->strm.avail_out; + } + if (*ret != LZMA_OK) + return -1; + if (!lzma_file->strm.avail_out) + return len; + if (eof) + return -1; + } +} + +static ssize_t +lzma_write(lzma_ret *lzma_error, lzma_FILE *lzma_file, void *buf, size_t len) +{ + lzma_ret *ret = lzma_error; + size_t n; + + if (!lzma_file || !lzma_file->encoding) + return -1; + if (!len) + return 0; + + lzma_file->strm.next_in = buf; + lzma_file->strm.avail_in = len; + for (;;) { + lzma_file->strm.next_out = (unsigned char *)lzma_file->buf; + lzma_file->strm.avail_out = sizeof(lzma_file->buf); + *ret = lzma_code(&lzma_file->strm, LZMA_RUN); + if (*ret != LZMA_OK) + return -1; + n = sizeof(lzma_file->buf) - lzma_file->strm.avail_out; + if (n && fwrite(lzma_file->buf, 1, n, lzma_file->fp) != n) + return -1; + if (!lzma_file->strm.avail_in) + return len; + } +} + +/* ===================================================================== */ +/* Structure definitions. */ + +typedef struct +{ + PyObject_HEAD + PyObject *format, + *format_dict, + *check, + *check_dict, + *filter_dict, + *filter_dictSwap, + *filter, + *compresslevel, + *dict_size, + *lc, + *lp, + *pb, + *mode_dict, + *mode_dictSwap, + *mode, + *nice_len, + *mf_dict, + *mf_dictSwap, + *mf, + *depth, + *dist, + *start, + *threads; +} LZMAOptionsObject; + +typedef struct { + PyObject_HEAD + FILE *rawfp; + + char* f_buf; /* Allocated readahead buffer */ + char* f_bufend; /* Points after last occupied position */ + char* f_bufptr; /* Current buffer position */ + + lzma_FILE *fp; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_check check; + unsigned long long memlimit; + + file_mode mode; + Py_off_t pos; + Py_off_t size; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} LZMAFileObject; + +typedef struct +{ + PyObject_HEAD + lzma_stream lzus; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_check check; + int is_initialised, running; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} LZMACompObject; + +typedef struct +{ + PyObject_HEAD + lzma_stream lzus; + PyObject *unused_data; + PyObject *unconsumed_tail; + Py_ssize_t max_length; + int is_initialised, running; + unsigned long long memlimit; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} LZMADecompObject; + +/* ===================================================================== */ +/* Utility functions. */ + +/* Refuse regular I/O if there's data in the iteration-buffer. + * Mixing them would cause data to arrive out of order, as the read* + * methods don't use the iteration buffer. */ +static int +check_iterbuffered(LZMAFileObject *f) +{ + if (f->f_buf != NULL && + (f->f_bufend - f->f_bufptr) > 0 && + f->f_buf[0] != '\0') { + PyErr_SetString(PyExc_ValueError, + "Mixing iteration and read methods would lose data"); + return -1; + } + return 0; +} + +static PyObject *LZMAError = NULL; + +static int +Util_CatchLZMAError(lzma_ret lzuerror, lzma_stream *lzus, int encoding) +{ + int ret = 1; + switch(lzuerror) { + case LZMA_OK: + case LZMA_STREAM_END: + break; + + case LZMA_NO_CHECK: + PyErr_WarnEx(LZMAError, "stream has no integrity check", 1); + break; + + case LZMA_UNSUPPORTED_CHECK: + if(encoding) + { + PyErr_SetString(LZMAError, "Cannot calculate the integrity check"); + ret = 0; + } + else if(!encoding) + { + char warning[50]; + sprintf(warning, "check type '%d' is unsupported, check will not be validated", + lzma_get_check(lzus)); + PyErr_SetString(LZMAError, warning); + } + break; + + case LZMA_GET_CHECK: + /*TODO: ?*/ + break; + + case LZMA_MEM_ERROR: + PyErr_SetString(PyExc_MemoryError, "cannot allocate memory"); + ret = 0; + break; + + case LZMA_MEMLIMIT_ERROR: + PyErr_SetString(PyExc_MemoryError, "memory usage limit was reached"); + ret = 0; + break; + + case LZMA_FORMAT_ERROR: + PyErr_SetString(LZMAError, "unknown file format"); + ret = 0; + break; + + case LZMA_OPTIONS_ERROR: + PyErr_SetString(LZMAError, "invalid or unsupported options"); + ret = 0; + break; + + case LZMA_DATA_ERROR: + PyErr_SetString(PyExc_IOError, "invalid data stream"); + ret = 0; + break; + + case LZMA_BUF_ERROR: + if (lzus != NULL && lzus->avail_out > 0) { + PyErr_SetString(PyExc_IOError, "unknown BUF error"); + ret = 0; + } + break; + + /*case LZMA_HEADER_ERROR: + PyErr_SetString(PyExc_RuntimeError, "invalid or unsupported header"); + ret = 0; + break;*/ + + case LZMA_PROG_ERROR: + /*FIXME: fix more accurate error message..*/ + PyErr_SetString(PyExc_ValueError, + "the lzma library has received wrong " + "options"); + ret = 0; + break; + + default: + ret = 0; + PyErr_SetString(LZMAError, "unknown error!"); + break; + + } + return ret; +} + +#if BUFSIZ <= 1024 +#define SMALLCHUNK 8192 +#else +#define SMALLCHUNK BUFSIZ +#endif + +#if SIZEOF_INT < 4 +#define BIGCHUNK (512 * 32) +#else +#define BIGCHUNK (512 * 1024) +#endif + +/* This is a hacked version of Python's fileobject.c:new_buffersize(). */ +static size_t +Util_NewBufferSize(size_t currentsize) +{ + if (currentsize > SMALLCHUNK) { + /* Keep doubling until we reach BIGCHUNK; + then keep adding BIGCHUNK. */ + if (currentsize <= BIGCHUNK) + return currentsize + currentsize; + else + return currentsize + BIGCHUNK; + } + return currentsize + SMALLCHUNK; +} + +/* This is a hacked version of Python's fileobject.c:get_line(). */ +static PyObject * +Util_GetLine(LZMAFileObject *f, int n) +{ + char c; + char *buf, *end; + size_t total_v_size; /* total # of slots in buffer */ + size_t used_v_size = 0; /* # used slots in buffer */ + size_t increment; /* amount to increment the buffer */ + PyObject *v; + lzma_ret lzuerror = LZMA_OK; + int bytes_read; + + total_v_size = n > 0 ? n : 100; + v = PyBytes_FromStringAndSize((char *)NULL, total_v_size); + if (v == NULL) + return NULL; + + buf = BUF(v); + end = buf + total_v_size; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + do { + bytes_read = lzma_read(&lzuerror, f->fp, &c, 1); + f->pos++; + if (bytes_read == 0) + break; + *buf++ = c; + } while (lzuerror == LZMA_OK && c != '\n' && buf != end); + Py_END_ALLOW_THREADS + if (lzuerror == LZMA_STREAM_END) { + f->size = f->pos; + break; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &f->fp->strm, f->fp->encoding); + Py_DECREF(v); + return NULL; + } + if (c == '\n') + break; + /* Must be because buf == end */ + if (n > 0) + break; + used_v_size = total_v_size; + increment = total_v_size >> 2; /* mild exponential growth */ + total_v_size += increment; + if (total_v_size > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "line is longer than a Python string can hold"); + Py_DECREF(v); + return NULL; + } + if (_PyBytes_Resize(&v, total_v_size) < 0) { + return NULL; + } + buf = BUF(v) + used_v_size; + end = BUF(v) + total_v_size; + } + + used_v_size = buf - BUF(v); + if (used_v_size != total_v_size) { + if (_PyBytes_Resize(&v, used_v_size) < 0) { + v = NULL; + } + } + + return v; +} + +/* This is a hacked version of Python's fileobject.c:drop_readahead(). */ +static void +Util_DropReadAhead(LZMAFileObject *f) +{ + if (f->f_buf != NULL) { + PyMem_Free(f->f_buf); + f->f_buf = NULL; + } +} + +/* This is a hacked version of Python's fileobject.c:readahead(). */ +static int +Util_ReadAhead(LZMAFileObject *f, int bufsize) +{ + int chunksize; + lzma_ret lzuerror = LZMA_OK; + + if (f->f_buf != NULL) { + if((f->f_bufend - f->f_bufptr) >= 1) + return 0; + else + Util_DropReadAhead(f); + } + if (f->fp->eof) { + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf; + return 0; + } + if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_BEGIN_ALLOW_THREADS + chunksize = lzma_read(&lzuerror, f->fp, f->f_buf, bufsize); + Py_END_ALLOW_THREADS + f->pos += chunksize; + if (lzuerror == LZMA_STREAM_END) { + f->size = f->pos; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &f->fp->strm, f->fp->encoding); + Util_DropReadAhead(f); + return -1; + } + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf + chunksize; + return 0; +} + +/* This is a hacked version of Python's + * fileobject.c:readahead_get_line_skip(). */ +static PyObject * +Util_ReadAheadGetLineSkip(LZMAFileObject *f, int skip, int bufsize) +{ + PyObject* s; + char *bufptr; + char *buf; + int len; + + if (f->f_buf == NULL) + if (Util_ReadAhead(f, bufsize) < 0) + return NULL; + + len = f->f_bufend - f->f_bufptr; + if (len == 0) + return PyBytes_FromStringAndSize(NULL, skip); + bufptr = memchr(f->f_bufptr, '\n', len); + if (bufptr != NULL) { + bufptr++; /* Count the '\n' */ + len = bufptr - f->f_bufptr; + s = PyBytes_FromStringAndSize(NULL, skip+len); + if (s == NULL) + return NULL; + memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len); + f->f_bufptr = bufptr; + if (bufptr == f->f_bufend) + Util_DropReadAhead(f); + } else { + bufptr = f->f_bufptr; + buf = f->f_buf; + f->f_buf = NULL; /* Force new readahead buffer */ + s = Util_ReadAheadGetLineSkip(f, skip+len, + bufsize + (bufsize>>2)); + if (s == NULL) { + PyMem_Free(buf); + return NULL; + } + memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len); + PyMem_Free(buf); + } + return s; +} + +/* ===================================================================== */ +/* Methods of LZMAOptions. */ + +#define CHECK_RANGE(x, a, b, msg) if (check_range(x, a, b)) { PyErr_Format(PyExc_ValueError, msg, a, b, (int)x); goto end; } +static int +check_range(unsigned int x, unsigned int a, unsigned int b){ + return (x < a || x > b); +} + +static char* +tostring(const char* text, PyObject *variable) { + PyObject *format = PyUnicode_FromString(text); + PyObject *str = PyUnicode_Format(format, variable); + PyObject *ascii = PyUnicode_AsASCIIString(str); + char *cstr = PyBytes_AsString(ascii); + Py_DECREF(str); + Py_DECREF(format); + return cstr; +} + +static PyMemberDef +memberDef(char *name, int type, Py_ssize_t offset, int flags, char *doc) { + PyMemberDef tmp; + tmp.name = name, tmp.type = type, tmp.offset = offset, tmp.flags = flags, tmp.doc = doc; + return tmp; +} +#define MEMBER_DESCRIPTOR(name, type, variable, text) memberDef(name, type, offsetof(LZMAOptionsObject, variable), READONLY, tostring(text, self->variable)) + +#define LZMA_BEST_SPEED 0 +#define LZMA_BEST_COMPRESSION 9 +#define LZMA_MODE_DEFAULT LZMA_MODE_NORMAL +#define LZMA_MODE_INVALID -1 +#define LZMA_MF_INVALID -1 + +#define LZMA_MF_DEFAULT LZMA_MF_BT4 +#define LZMA_MF_CYCLES_DEFAULT 0 +#define LZMA_DICT_SIZE_MAX (UINT32_C(1) << 30) + (UINT32_C(1) << 29) +#define LZMA_NICE_LEN_MIN 5 +#define LZMA_NICE_LEN_MAX 273 +#define LZMA_NICE_LEN_DEFAULT 128 + +static void +free_lzma_options(lzma_filter *filters) +{ + int i; + for(i = 0; filters[i].id != LZMA_VLI_UNKNOWN && i < LZMA_FILTERS_MAX; i++) + if(filters[i].options != NULL) { + PyMem_Free(filters[i].options); + filters[i].options = NULL; + } +} + +#define DEFAULT_OPTIONS_STRING \ +"compresslevel=6, format='xz', check='crc64', threads=1, filter=({'id':'lzma2',\n"\ +"'extreme':False, 'dict_size':23, 'lc':3 'lp':0, 'pb':2, 'mode':2,\n"\ +"'nice_len':128, 'mf':'bt4', 'depth':0})" + +PyDoc_STRVAR(LZMAOptions__doc__, +"This class describes the different LZMA compression options and holds the\n\ +different min and max value constants for these in the variables.\n\ +\n\ +\n"); + +static PyMemberDef LZMAOptions_members[16]; + +static LZMAOptionsObject *Options = NULL; + +static PyObject * +LZMAOptions_repr(LZMAOptionsObject *obj) +{ + return PyUnicode_FromFormat("%s singleton for accessing descriptors", Py_TYPE(obj)->tp_name); +} + +static void +LZMAOptions_dealloc(LZMAOptionsObject* self) +{ + Py_XDECREF(self->format); + Py_XDECREF(self->check); + Py_XDECREF(self->check_dict); + Py_XDECREF(self->filter_dict); + Py_XDECREF(self->filter_dictSwap); + Py_XDECREF(self->filter); + Py_XDECREF(self->compresslevel); + Py_XDECREF(self->dict_size); + Py_XDECREF(self->lc); + Py_XDECREF(self->lp); + Py_XDECREF(self->pb); + Py_XDECREF(self->mode_dict); + Py_XDECREF(self->mode_dictSwap); + Py_XDECREF(self->mode); + Py_XDECREF(self->nice_len); + Py_XDECREF(self->mf_dict); + Py_XDECREF(self->mf_dictSwap); + Py_XDECREF(self->mf); + Py_XDECREF(self->depth); + Py_XDECREF(self->dist); + Py_XDECREF(self->start); + Py_XDECREF(self->threads); + + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject * +LZMA_options_get(lzma_filter *filters, LZMAOptionsObject *optionsObj) +{ + Py_ssize_t size; + PyObject *filtersTuple; + + for(size = 0; size <= LZMA_FILTERS_MAX; size++) + if(filters[size].id == LZMA_VLI_UNKNOWN) + break; + filtersTuple = PyTuple_New(size); + for(size--; size >= 0; size--) { + PyObject *filter = PyDict_New(); + switch(filters[size].id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: { + lzma_options_lzma lzma_options = *((lzma_options_lzma*)filters[size].options); + PyDict_SetItemString(filter, "dict_size", PyLong_FromLong((long)lzma_options.dict_size)); + PyDict_SetItemString(filter, "lc", PyLong_FromLong((long)lzma_options.lc)); + PyDict_SetItemString(filter, "lp", PyLong_FromLong((long)lzma_options.lp)); + PyDict_SetItemString(filter, "pb", PyLong_FromLong((long)lzma_options.pb)); + PyDict_SetItemString(filter, "mode", PyDict_GetItem(optionsObj->mode_dictSwap, PyLong_FromLong((long)lzma_options.mode))); + PyDict_SetItemString(filter, "nice_len", PyLong_FromLong((long)lzma_options.nice_len)); + PyDict_SetItemString(filter, "mf", PyDict_GetItem(optionsObj->mf_dictSwap, PyLong_FromLong((long)lzma_options.mf))); + PyDict_SetItemString(filter, "depth", PyLong_FromLong((long)lzma_options.depth)); + break; + } + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: { + lzma_options_bcj bcj_options = *((lzma_options_bcj*)filters[size].options); + PyDict_SetItemString(filter, "start", PyLong_FromLong((long)bcj_options.start_offset)); + break; + } + case LZMA_FILTER_DELTA: { + lzma_options_delta delta_options = *((lzma_options_delta*)filters[size].options); + if(delta_options.type == LZMA_DELTA_TYPE_BYTE) + PyDict_SetItemString(filter, "type", PyUnicode_FromString("byte")); + PyDict_SetItemString(filter, "dist", PyLong_FromLong((long)delta_options.dist)); + break; + } + /*case LZMA_FILTER_SUBBLOCK:*/ + default: + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat( + "'%d' unknown filter", filters[size].id)); + } + PyDict_SetItemString(filter, "id", PyDict_GetItem(optionsObj->filter_dictSwap, PyLong_FromLong((long)filters[size].id))); + Py_INCREF(filter); + PyTuple_SetItem(filtersTuple, size, filter); + } + Py_INCREF(filtersTuple); + + return filtersTuple; +} + +static int init_delta_filter(const char *funcName, PyObject *args, PyObject *kwargs, void **optionsPtr) +{ + char argString[64]; + static char *kwlist[] = {"type", "dist"}; + PyObject *type = NULL; + int ret = 0; + lzma_options_delta *options = *optionsPtr = PyMem_Malloc(sizeof(lzma_options_delta)); + + options->type = LZMA_DELTA_TYPE_BYTE; + options->dist = LZMA_DELTA_DIST_MIN; + + PyOS_snprintf(argString, sizeof(argString), "|Ui:%s", funcName); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, argString, kwlist, &type, &options->dist)) + goto end; + CHECK_RANGE(options->dist, LZMA_DELTA_DIST_MIN, LZMA_DELTA_DIST_MAX, + "dist must be between %d and %d, got %d"); + if(type){ + /* There's only one type supported for now, but let's add support for setting the type + * anyways for future compatibility in case of support for addtional types gets added + */ + if(PyUnicode_CompareWithASCIIString(type, "byte") == 0) + options->type = LZMA_DELTA_TYPE_BYTE; + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromString("Only byte-wise delta ('byte') is currently " + "supported for type of delta calculation")); + goto end; + } + } + + ret = 1; +end: + return ret; +} + +static int init_bcj_filter(const char *funcName, PyObject *args, PyObject *kwargs, void **optionsPtr) +{ + char argString[64]; + static char *kwlist[] = {"start"}; + lzma_options_bcj *options = *optionsPtr = PyMem_Malloc(sizeof(lzma_options_bcj)); + + options->start_offset = 0; + + PyOS_snprintf(argString, sizeof(argString), "|i:%s", funcName); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, argString, kwlist, &options->start_offset)) + return 0; + + return 1; +} + +static int init_lzma_filter(const char *funcName, PyObject *args, PyObject *kwargs, int compresslevel, void **optionsPtr) +{ + PyObject *mf_key = NULL, *mode_key = NULL; + char argString[64]; + static char *kwlist[] = {"dict_size", "lc", "lp", "pb", "nice_len", + "depth", "mode", "mf", NULL}; + int ret = 0; + lzma_options_lzma *options = *optionsPtr = PyMem_Malloc(sizeof(lzma_options_lzma)); + + CHECK_RANGE(compresslevel, LZMA_BEST_SPEED, LZMA_BEST_COMPRESSION, "compresslevel must be between %d and %d, got %d"); + + if(kwargs != NULL) { + if(PyMapping_HasKeyString(kwargs, "extreme")){ + if(PyBool_Check(PyDict_GetItemString(kwargs, "extreme"))) + compresslevel |= LZMA_PRESET_EXTREME; + PyDict_DelItemString(kwargs, "extreme"); + } + } + + lzma_lzma_preset(options, compresslevel); + if(kwargs == NULL) + return 1; + + PyOS_snprintf(argString, sizeof(argString), "|iiiiiiUU:%s", funcName); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, argString, kwlist, + &options->dict_size, &options->lc, &options->lp, &options->pb, + &options->nice_len, &options->depth, &mode_key, &mf_key)) + goto end; + + CHECK_RANGE(options->dict_size, LZMA_DICT_SIZE_MIN, LZMA_DICT_SIZE_MAX, + "dict_size must be between %d and %d, got %d"); + CHECK_RANGE(options->lc, LZMA_LCLP_MIN, LZMA_LCLP_MAX, + "lc must be between %d and %d, got %d"); + CHECK_RANGE(options->lp, LZMA_LCLP_MIN, LZMA_LCLP_MAX, + "lp must be between %d and %d, got %d"); + CHECK_RANGE(options->pb, LZMA_PB_MIN, LZMA_PB_MAX, + "pb must be between %d and %d, got %d"); + CHECK_RANGE(options->nice_len, LZMA_NICE_LEN_MIN, LZMA_NICE_LEN_MAX, + "nice_len must be between %d and %d, got %d"); + if((int)options->depth < 0){ + PyErr_Format(PyExc_ValueError, "depth must be >= 0"); + goto end; + } + + if(mode_key) { + if(PyDict_Contains(Options->mode_dict, mode_key)) + options->mode = PyLong_AsLong(PyDict_GetItem(Options->mode_dict, mode_key)); + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat("mode invalid '%S'", mode_key)); + goto end; + } + } + if(mf_key) { + if(PyDict_Contains(Options->mf_dict, mf_key)) + options->mf = PyLong_AsLong(PyDict_GetItem(Options->mf_dict, mf_key)); + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat("mf invalid '%S'", mf_key)); + goto end; + } + } + + ret = 1; +end: + return ret; +} + +/* This function is for parsing the options given for compression, since we have both a + * one shot compress function and a sequential compressor object class, we'll share + * this code amongst them. + */ +static int +init_lzma_options(const char *funcName, lzma_filter *filters, lzma_check *check, + int compresslevel, PyObject *format, PyObject *myCheck, + int threads, PyObject *filter) +{ + PyObject *args = NULL; + int ret = 0; + Py_ssize_t filterSize = 1; + + filters[0].options = filters[1].options = filters[2].options = filters[3].options = NULL; + + /* We create an empty tuple since we only want to parse keywords */ + args = PyTuple_New(0); + + if(format){ + if(PyDict_Contains(Options->format_dict, format)) { + PyObject *formatTuple = PyDict_GetItem(Options->format_dict, format); + filters[0].id = PyLong_AsLong(PyTuple_GetItem(formatTuple,0)); + *check = PyLong_AsLong(PyTuple_GetItem(formatTuple,1)); + } + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat("Unsupported format '%S'", + format)); + goto end; + } + } + else { + PyObject *formatTuple = PyDict_GetItemString(Options->format_dict, "xz"); + filters[0].id = PyLong_AsLong(PyTuple_GetItem(formatTuple,0)); + *check = PyLong_AsLong(PyTuple_GetItem(formatTuple,1)); + } + + if(myCheck){ + if(filters[0].id == LZMA_FILTER_LZMA1 || *check == LZMA_CHECK_NONE) { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat( + "Integrity checking not supported for '%S' format", + format)); + return 0; + + } + if(PyDict_Contains(Options->check_dict, myCheck)) + *check = PyLong_AsLong(PyDict_GetItem(Options->check_dict, myCheck)); + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat("Unsupported integrity check type '%s'", + myCheck)); + return 0; + } + } + + if(filter == NULL) + ret = init_lzma_filter(funcName, args, NULL, compresslevel, &(filters[0].options)); + else { + char filterString[64]; + PyObject *filterDict[LZMA_FILTERS_MAX] = {NULL, NULL, NULL, NULL}; + + PyOS_snprintf(filterString, sizeof(filterString), "O|OOO:%s", funcName); + + if(PyDict_Check(filter)) { + filterDict[0] = filter; + } + else if(PyTuple_Check(filter)) { + if(filters[0].id == LZMA_FILTER_LZMA1) { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromString( + "LZMA_Alone ('alone') format doesn't support multiple filters")); + goto end; + } + + if (!PyArg_ParseTuple(filter, filterString, &filterDict[0], &filterDict[1], &filterDict[2], &filterDict[3])) + goto end; + filterSize = PyTuple_Size(filter); + } + + { + PyObject *filter; + int i; + for(filter = filterDict[i = 0]; i < filterSize; filter = filterDict[++i]) + { + PyObject *id = NULL; + ret = 0; + + if(PyMapping_HasKeyString(filter, "id")){ + id = PyDict_GetItemString(filter, "id"); + PyDict_DelItemString(filter, "id"); + } + + if(id){ + if(PyDict_Contains(Options->filter_dict, id)){ + filters[i].id = PyLong_AsLong(PyDict_GetItem(Options->filter_dict, id)); + } + else { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat("Unsupported filter: '%S'", id)); + break; + } + if(format && PyUnicode_CompareWithASCIIString(format, "alone") == 0) { + if(filters[i].id != LZMA_FILTER_LZMA1) { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromString( + "LZMA_Alone ('alone') format only supports 'lzma1' filter")); + break; + } + } + else { + if(filters[i].id == LZMA_FILTER_LZMA1) { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromString( + "Only LZMA_Alone ('alone') format supports 'lzma1' filter")); + break; + } + } + if(i != filterSize-1 && filters[i].id == LZMA_FILTER_LZMA2) { + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromString( + "'lzma2' filter may only be specified as the last filter")); + break; + } + + } + switch(filters[i].id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: + ret = init_lzma_filter(funcName, args, filter, compresslevel, &(filters[i].options)); + break; + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + ret = init_bcj_filter(funcName, args, filter, &(filters[i].options)); + break; + case LZMA_FILTER_DELTA: + ret = init_delta_filter(funcName, args, filter, &(filters[i].options)); + break; + /*case LZMA_FILTER_SUBBLOCK:*/ + default: + PyErr_SetObject(PyExc_ValueError, + PyUnicode_FromFormat( + "Support for '%S' filter not implemented yet", id)); + } + if(!ret) + break; + } + } + } + filters[filterSize].id = LZMA_VLI_UNKNOWN; + +end: + Py_DECREF(args); + if(!ret) + free_lzma_options(filters); + + return ret; +} + +static PyObject *swapDict(PyObject *dict) +{ + PyObject *keys, *values, *newDict; + Py_ssize_t i; + + keys = PyDict_Keys(dict); + values = PyDict_Values(dict); + newDict = PyDict_New(); + for(i = PyDict_Size(dict)-1; i >= 0; i--) { + PyDict_SetItem(newDict, PyList_GetItem(values, i), PyList_GetItem(keys, i)); + } + Py_DECREF(keys); + Py_DECREF(values); + + return newDict; +} + +static void dictSetItem_Sort(PyObject *dict, const char *key, PyObject *value, PyObject **tuple) +{ + Py_ssize_t size = PyTuple_Size(*tuple); + + PyDict_SetItemString(dict, key, value); + _PyTuple_Resize(tuple, size+1); + PyTuple_SetItem(*tuple, size, PyUnicode_FromString(key)); +} + +/* Maybe not the best way, but it will at least prevent new instances.. */ +static PyObject * +LZMAOptions_alloc(PyTypeObject *type, Py_ssize_t nitems) +{ + LZMAOptionsObject *self = (LZMAOptionsObject*)PyType_GenericAlloc(type, nitems); + PyObject *compresslevelopts, *compresslevelString; + + self->format_dict = PyDict_New(); + self->format = PyTuple_New(0); + dictSetItem_Sort(self->format_dict, "xz", PyTuple_Pack(2, + PyLong_FromUnsignedLongLong(LZMA_FILTER_LZMA2), + PyLong_FromLong(LZMA_CHECK_CRC64)), &self->format); + dictSetItem_Sort(self->format_dict, "alone", PyTuple_Pack(2, + PyLong_FromUnsignedLongLong(LZMA_FILTER_LZMA1), + PyLong_FromLong(LZMA_CHECK_NONE)), &self->format); + dictSetItem_Sort(self->format_dict, "raw", PyTuple_Pack(2, + PyLong_FromUnsignedLongLong(LZMA_FILTER_LZMA2), + PyLong_FromLong((lzma_check)-1)), &self->format); + + self->check_dict = PyDict_New(); + self->check = PyTuple_New(0); + dictSetItem_Sort(self->check_dict, "crc32",PyLong_FromLong(LZMA_CHECK_CRC32), &self->check); + dictSetItem_Sort(self->check_dict, "crc64", PyLong_FromLong(LZMA_CHECK_CRC64), &self->check); + dictSetItem_Sort(self->check_dict, "sha256", PyLong_FromLong(LZMA_CHECK_SHA256), &self->check); + dictSetItem_Sort(self->check_dict, "none", PyLong_FromLong((lzma_check)-1), &self->check); + + self->threads = PyTuple_Pack(2, PyLong_FromLong(1), PyLong_FromLong(1)); + + self->filter_dict = PyDict_New(); + self->filter = PyTuple_New(0); + dictSetItem_Sort(self->filter_dict, "lzma1", PyLong_FromUnsignedLongLong(LZMA_FILTER_LZMA1), &self->filter); + dictSetItem_Sort(self->filter_dict, "lzma2", PyLong_FromUnsignedLongLong(LZMA_FILTER_LZMA2), &self->filter); + dictSetItem_Sort(self->filter_dict, "x86", PyLong_FromUnsignedLongLong(LZMA_FILTER_X86), &self->filter); + dictSetItem_Sort(self->filter_dict, "powerpc", PyLong_FromUnsignedLongLong(LZMA_FILTER_POWERPC), &self->filter); + dictSetItem_Sort(self->filter_dict, "ia64", PyLong_FromUnsignedLongLong(LZMA_FILTER_IA64), &self->filter); + dictSetItem_Sort(self->filter_dict, "arm", PyLong_FromUnsignedLongLong(LZMA_FILTER_ARM), &self->filter); + dictSetItem_Sort(self->filter_dict, "armthumb", PyLong_FromUnsignedLongLong(LZMA_FILTER_ARMTHUMB), &self->filter); + dictSetItem_Sort(self->filter_dict, "sparc", PyLong_FromUnsignedLongLong(LZMA_FILTER_SPARC), &self->filter); + dictSetItem_Sort(self->filter_dict, "delta", PyLong_FromUnsignedLongLong(LZMA_FILTER_DELTA), &self->filter); + self->filter_dictSwap = swapDict(self->filter_dict); + + self->compresslevel = PyTuple_Pack(2, PyLong_FromLong(LZMA_BEST_SPEED), + PyLong_FromLong(LZMA_BEST_COMPRESSION)); + self->dict_size = PyTuple_Pack(2, PyLong_FromLong(LZMA_DICT_SIZE_MIN), + PyLong_FromLong(LZMA_DICT_SIZE_MAX)); + self->lc = PyTuple_Pack(2, PyLong_FromLong(LZMA_LCLP_MIN), + PyLong_FromLong(LZMA_LCLP_MAX)); + self->lp = PyTuple_Pack(2, PyLong_FromLong(LZMA_LCLP_MIN), + PyLong_FromLong(LZMA_LCLP_MAX)); + self->pb = PyTuple_Pack(2, PyLong_FromLong(LZMA_PB_MIN), + PyLong_FromLong(LZMA_PB_MAX)); + self->nice_len = PyTuple_Pack(2, PyLong_FromLong(LZMA_NICE_LEN_MIN), + PyLong_FromLong(LZMA_NICE_LEN_MAX)); + self->depth = PyLong_FromLong(0); + + self->mode_dict = PyDict_New(); + self->mode = PyTuple_New(0); + dictSetItem_Sort(self->mode_dict, "fast", PyLong_FromLong(LZMA_MODE_FAST), &self->mode); + dictSetItem_Sort(self->mode_dict, "normal", PyLong_FromLong(LZMA_MODE_NORMAL), &self->mode); + self->mode_dictSwap = swapDict(self->mode_dict); + + self->mf_dict = PyDict_New(); + self->mf = PyTuple_New(0); + dictSetItem_Sort(self->mf_dict, "hc3", PyLong_FromLong(LZMA_MF_HC3), &self->mf); + dictSetItem_Sort(self->mf_dict, "hc4", PyLong_FromLong(LZMA_MF_HC4), &self->mf); + dictSetItem_Sort(self->mf_dict, "bt2", PyLong_FromLong(LZMA_MF_BT2), &self->mf); + dictSetItem_Sort(self->mf_dict, "bt3", PyLong_FromLong(LZMA_MF_BT3), &self->mf); + dictSetItem_Sort(self->mf_dict, "bt4", PyLong_FromLong(LZMA_MF_BT4), &self->mf); + self->mf_dictSwap = swapDict(self->mf_dict); + + + self->dist = PyTuple_Pack(2, PyLong_FromLong(LZMA_DELTA_DIST_MIN), + PyLong_FromLong(LZMA_DELTA_DIST_MAX)); + self->start = PyLong_FromLong(0); + + Py_INCREF(self); + + compresslevelString = PyBytes_FromString( + "Compression preset level (%u - %u, LZMA)\n" + "This will automatically set the values for the various compression options.\n" + "Setting any of the other compression options at the same time as well will\n" + "override the specific value set by this preset level.\n" + "\n" + "Preset level settings:\n" + "compresslevel\t lc\t lp\t pb\t mode\t mf\t nice_len\t depth\t dict_size\n"); + compresslevelopts = PyUnicode_FromString("%d\t\t %u\t %u\t %u\t %s\t %s\t %u\t\t %u\t %u\n"); + + { + int compresslevelNum; + lzma_options_lzma options; + lzma_filter filter[LZMA_FILTERS_MAX + 1]; + filter[0].id = LZMA_FILTER_LZMA2; + filter[0].options = &options; + filter[1].id = LZMA_VLI_UNKNOWN; + for(compresslevelNum = LZMA_BEST_COMPRESSION; compresslevelNum >= LZMA_BEST_SPEED; compresslevelNum--){ + PyObject *filters_tuple, + *options_dict, + *options_tuple, + *settingsUnicode, + *settingsAscii; + + lzma_lzma_preset(&options, compresslevelNum); + filters_tuple = LZMA_options_get(filter, self); + options_dict = PyTuple_GetItem(filters_tuple, 0); + options_tuple = PyTuple_Pack(9, + PyLong_FromLong(compresslevelNum), + PyMapping_GetItemString(options_dict, "lc"), + PyMapping_GetItemString(options_dict, "lp"), + PyMapping_GetItemString(options_dict, "pb"), + PyMapping_GetItemString(options_dict, "mode"), + PyMapping_GetItemString(options_dict, "mf"), + PyMapping_GetItemString(options_dict, "nice_len"), + PyMapping_GetItemString(options_dict, "depth"), + PyMapping_GetItemString(options_dict, "dict_size")); + + settingsUnicode = PyUnicode_Format(compresslevelopts, options_tuple); + settingsAscii = PyUnicode_AsASCIIString(settingsUnicode); + PyBytes_ConcatAndDel(&compresslevelString, settingsAscii); + + Py_DECREF(options_tuple); + Py_DECREF(options_dict); + Py_DECREF(filters_tuple); + Py_DECREF(settingsUnicode); + }} + Py_DECREF(compresslevelopts); + LZMAOptions_members[0] = MEMBER_DESCRIPTOR("format", T_OBJECT, format, + "File format to use for compression:\n" + "'%s': XZ format used by new xz tool. (default)\n" + "'%s': LZMA_Alone format used by older lzma utils.\n" + "'%s': Raw format.\n"); + LZMAOptions_members[1] = MEMBER_DESCRIPTOR("check", T_OBJECT, check, + "Type of integrity check to use (XZ format only):\n" + "'%s': CRC32 using the polynomial from the IEEE 802.3 standard.\n" + "'%s': CRC64 using the polynomial from the ECMA-182 standard. (default)\n" + "'%s': SHA-256.\n" + "'%s': Don't use any integrity check.\n"); + LZMAOptions_members[2] = MEMBER_DESCRIPTOR("threads", T_OBJECT, threads, + "Number of threads used for compression.\n" + "A value of 0 means one thread per available CPU.\n" + "\n" + "Default: %d\n" + "Available CPUs: %d\n" + "\n" + "This feature is currently not implemented yet.\n"); + LZMAOptions_members[3] = MEMBER_DESCRIPTOR("id", T_OBJECT, filter, + "Filter id\n" + "Available filters:\n" + "'%s': LZMA1 [LZMA]\n" + "'%s': LZMA2 [LZMA]\n" + "'%s': x86 [BCJ]\n" + "'%s': PowerPC (big endian only) [BCJ]\n" + "'%s': IA64 (Itanium) [BCJ]\n" + "'%s': ARM (little endian only) [BCJ]\n" + "'%s': ARM-Thumb (little endian only) [BCJ]\n" + "'%s': SPARC [BCJ]\n" + "'%s': Delta [Delta]\n"); + LZMAOptions_members[4] = MEMBER_DESCRIPTOR("compresslevel", T_OBJECT, compresslevel, PyBytes_AsString(compresslevelString)); + LZMAOptions_members[5] = MEMBER_DESCRIPTOR("dict_size", T_OBJECT, dict_size, + "Dictionary size in bytes (%u - %u, LZMA)\n" + "Dictionary size indicates how many bytes of the recently processed\n" + "uncompressed data is kept in memory. One method to reduce size of\n" + "the uncompressed data is to store distance-length pairs, which\n" + "indicate what data to repeat from the dictionary buffer. Thus,\n" + "the bigger the dictionary, the better compression ratio usually is.\n"); + LZMAOptions_members[6] = MEMBER_DESCRIPTOR("lc", T_OBJECT, lc, + "Number of literal context bits (%u - %u, LZMA)\n" + "How many of the highest bits of the previous uncompressed\n" + "eight-bit byte (also known as `literal') are taken into\n" + "account when predicting the bits of the next literal.\n" + "\n" + "There is a limit that applies to literal context bits and literal\n" + "position bits together: lc + lp <= 4. Without this limit the\n" + "decoding could become very slow, which could have security related\n" + "results in some cases like email servers doing virus scanning."); + LZMAOptions_members[7] = MEMBER_DESCRIPTOR("lp", T_OBJECT, lp, + "Number of literal position bits (%u - %u, LZMA)\n" + "How many of the lowest bits of the current position (number\n" + "of bytes from the beginning of the uncompressed data) in the\n" + "uncompressed data is taken into account when predicting the\n" + "bits of the next literal (a single eight-bit byte).\n"); + LZMAOptions_members[8] = MEMBER_DESCRIPTOR("pb", T_OBJECT, pb, + "Number of position bits Position bits (%u - %u, LZMA)\n" + "How many of the lowest bits of the current position in the\n" + "uncompressed data is taken into account when estimating\n" + "probabilities of matches. A match is a sequence of bytes for\n" + "which a matching sequence is found from the dictionary and\n" + "thus can be stored as distance-length pair.\n" + "\n" + "Example: If most of the matches occur at byte positions\n" + "of 8 * n + 3, that is, 3, 11, 19, ... set pb to 3,\n" + "because 2**3 == 8.\n"); + LZMAOptions_members[9] = MEMBER_DESCRIPTOR("mode", T_OBJECT, mode, + "Available modes: ('%s' or '%s', LZMA).\n" + "Fast mode is usually at its best when combined with a hash chain match finder.\n" + "Best is usually notably slower than fast mode. Use this together with binary\n" + "tree match finders to expose the full potential of the LZMA encoder."); + LZMAOptions_members[10] = MEMBER_DESCRIPTOR("nice_len", T_OBJECT, nice_len, + "Nice length of a match (also known as number of fast bytes) (%u - %u, LZMA)\n" + "Nice length of match determines how many bytes the encoder\n" + "compares from the match candidates when looking for the best\n" + "match. Bigger fast bytes value usually increase both compression\n" + "ratio and time.\n"); + LZMAOptions_members[11] = MEMBER_DESCRIPTOR("mf", T_OBJECT, mf, + "Match Finder (LZMA)\n" + "Match finder has major effect on both speed and compression ratio.\n" + "Usually hash chains are faster than binary trees.\n" + "Available match finders:\n" + "'%s': Binary Tree with 2 bytes hashing\n" + "Memory requirements: 9.5 * dict_size + 4 MiB\n" + "'%s': Binary Tree with 3 bytes hashing\n" + "Memory requirements: 11.5 * dict_size + 4 MiB\n" + "'%s': Binary Tree with 4 bytes hashing\n" + "Memory requirements: 11.5 * dict_size + 4 MiB\n" + "'%s': Hash Chain with 3 bytes hashing\n" + "'%s': Hash Chain with 4 bytes hashing\n" + "Memory requirements: 7.5 * dict_size + 4 MiB\n"); + LZMAOptions_members[12] = MEMBER_DESCRIPTOR("depth", T_OBJECT, depth, + "Depth (also known as match finder cycles, LZMA)\n" + "Higher values give slightly better compression ratio but\n" + "decrease speed. Use special value %u to let liblzma use\n" + "match-finder-dependent default value.\n"); + LZMAOptions_members[13] = MEMBER_DESCRIPTOR("dist", T_OBJECT, dist, + "Delta distance (%u - %u, Delta)\n" + "With the only currently supported type, 'bytes',\n" + "the distance is as bytes.\n" + "\n" + "Examples:\n" + "- 16-bit stereo audio: distance = 4 bytes\n" + "- 24-bit RGB image data: distance = 3 bytes\n" + "\n" + "Default: 1\n"); + LZMAOptions_members[14] = MEMBER_DESCRIPTOR("start", T_OBJECT, start, + "Start offset for conversions (BCJ)\n" + "This setting is useful only when the same filter is used\n" + "_separately_ for multiple sections of the same executable file,\n" + "and the sections contain cross-section branch/call/jump\n" + "instructions. In that case it is benefical to set the start\n" + "offset of the non-first sections so that the relative addresses\n" + "of the cross-section branch/call/jump instructions will use the\n" + "same absolute addresses as in the first section.\n" + "\n" + "Default: %u\n"); + + LZMAOptions_members[15] = memberDef(NULL, 0, 0, 0, NULL); /* sentinel */ + + Py_DECREF(compresslevelString); + return (PyObject*)self; +} + +/* Don't allow messing with this data.. */ +static int +LZMAOptions_setattr(LZMAOptionsObject *self, const char *name) +{ + (void)PyErr_Format(PyExc_RuntimeError, "Read-only attribute: %s\n", name); + return -1; +} + +static PyTypeObject LZMAOptions_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "lzma.LZMAOptions", /*tp_name*/ + sizeof(LZMAOptionsObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)LZMAOptions_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + (setattrfunc)LZMAOptions_setattr, /*tp_setattr*/ + 0, /*tp_reserved*/ + (reprfunc)LZMAOptions_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + 0, /*tp_flags*/ + LZMAOptions__doc__, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + LZMAOptions_members, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + LZMAOptions_alloc, /*tp_alloc*/ + 0, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0 /*tp_version_tag*/ +}; + +/* ===================================================================== */ +/* Methods of LZMAFile. */ + +PyDoc_STRVAR(LZMAFile_read__doc__, +"read([size]) -> string\n\ +\n\ +Read at most size uncompressed bytes, returned as a string. If the size\n\ +argument is negative or omitted, read until EOF is reached.\n\ +"); + +/* This is a hacked version of Python's fileobject.c:file_read(). */ +static PyObject * +LZMAFile_read(LZMAFileObject *self, PyObject *args) +{ + long bytesrequested = -1; + size_t bytesread, buffersize, chunksize; + lzma_ret lzuerror = LZMA_OK; + PyObject *ret = NULL; + + if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) + return NULL; + + ACQUIRE_LOCK(self); + switch (self->mode) { + case MODE_READ: + break; + case MODE_READ_EOF: + ret = PyBytes_FromStringAndSize("", 0); + goto cleanup; + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + default: + PyErr_SetString(PyExc_IOError, + "file is not ready for reading"); + goto cleanup; + case MODE_WRITE: + break; + } + + /* refuse to mix with f.next() */ + if (check_iterbuffered(self)) + goto cleanup; + + if (bytesrequested < 0) + buffersize = Util_NewBufferSize((size_t)0); + else + buffersize = bytesrequested; + if (buffersize > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "requested number of bytes is " + "more than a Python string can hold"); + goto cleanup; + } + ret = PyBytes_FromStringAndSize((char *)NULL, buffersize); + if (ret == NULL || buffersize == 0) + goto cleanup; + bytesread = 0; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + chunksize = lzma_read(&lzuerror, self->fp, + BUF(ret)+bytesread, + buffersize-bytesread); + self->pos += chunksize; + Py_END_ALLOW_THREADS + bytesread += chunksize; + if (lzuerror == LZMA_STREAM_END) { + self->size = self->pos; + self->mode = MODE_READ_EOF; + break; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + Py_DECREF(ret); + ret = NULL; + goto cleanup; + } + if (bytesrequested < 0) { + buffersize = Util_NewBufferSize(buffersize); + if (_PyBytes_Resize(&ret, buffersize) < 0) { + ret = NULL; + goto cleanup; + } + } else { + break; + } + } + if (bytesread != buffersize) { + if (_PyBytes_Resize(&ret, bytesread) < 0) { + ret = NULL; + } + } + +cleanup: + RELEASE_LOCK(self); + return ret; +} + + +PyDoc_STRVAR(LZMAFile_readline__doc__, +"readline([size]) -> string\n\ +\n\ +Return the next line from the file, as a string, retaining newline.\n\ +A non-negative size argument will limit the maximum number of bytes to\n\ +return (an incomplete line may be returned then). Return an empty\n\ +string at EOF.\n\ +"); + +static PyObject * +LZMAFile_readline(LZMAFileObject *self, PyObject *args) +{ + PyObject *ret = NULL; + int sizehint = -1; + + if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) + return NULL; + + ACQUIRE_LOCK(self); + switch (self->mode) { + case MODE_READ: + break; + case MODE_READ_EOF: + ret = PyBytes_FromStringAndSize("", 0); + goto cleanup; + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + case MODE_WRITE: + default: + PyErr_SetString(PyExc_IOError, + "file is not ready for reading"); + goto cleanup; + } + + /* refuse to mix with f.next() */ + if (check_iterbuffered(self)) + goto cleanup; + + if (sizehint == 0) + ret = PyBytes_FromStringAndSize("", 0); + else + ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); + +cleanup: + RELEASE_LOCK(self); + return ret; +} + +PyDoc_STRVAR(LZMAFile_readlines__doc__, +"readlines([size]) -> list\n\ +\n\ +Call readline() repeatedly and return a list of lines read.\n\ +The optional size argument, if given, is an approximate bound on the\n\ +total number of bytes in the lines returned.\n\ +"); + +/* This is a hacked version of Python's fileobject.c:file_readlines(). */ +static PyObject * +LZMAFile_readlines(LZMAFileObject *self, PyObject *args) +{ + long sizehint = 0; + PyObject *list = NULL; + PyObject *line; + char small_buffer[SMALLCHUNK]; + char *buffer = small_buffer; + size_t buffersize = SMALLCHUNK; + PyObject *big_buffer = NULL; + size_t nfilled = 0; + size_t nread; + size_t totalread = 0; + char *p, *q, *end; + int err; + int shortread = 0; + lzma_ret lzuerror = LZMA_OK; + + if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) + return NULL; + + ACQUIRE_LOCK(self); + switch (self->mode) { + case MODE_READ: + break; + case MODE_READ_EOF: + list = PyList_New(0); + goto cleanup; + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + case MODE_WRITE: + default: + PyErr_SetString(PyExc_IOError, + "file is not ready for reading"); + goto cleanup; + } + + /* refuse to mix with f.next() */ + if (check_iterbuffered(self)) + goto cleanup; + + if ((list = PyList_New(0)) == NULL) + goto cleanup; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + nread = lzma_read(&lzuerror, self->fp, + buffer+nfilled, buffersize-nfilled); + self->pos += nread; + Py_END_ALLOW_THREADS + if (lzuerror == LZMA_STREAM_END) { + self->size = self->pos; + self->mode = MODE_READ_EOF; + if (nread == 0) { + sizehint = 0; + break; + } + shortread = 1; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); +error: + Py_DECREF(list); + list = NULL; + goto cleanup; + } + totalread += nread; + p = memchr(buffer+nfilled, '\n', nread); + if (!shortread && p == NULL) { + /* Need a larger buffer to fit this line */ + nfilled += nread; + buffersize *= 2; + if (buffersize > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "line is longer than a Python string can hold"); + goto error; + } + if (big_buffer == NULL) { + /* Create the big buffer */ + big_buffer = PyBytes_FromStringAndSize( + NULL, buffersize); + if (big_buffer == NULL) + goto error; + buffer = PyBytes_AS_STRING(big_buffer); + memcpy(buffer, small_buffer, nfilled); + } + else { + /* Grow the big buffer */ + if (_PyBytes_Resize(&big_buffer, buffersize) < 0){ + big_buffer = NULL; + goto error; + } + buffer = PyBytes_AS_STRING(big_buffer); + } + continue; + } + end = buffer+nfilled+nread; + q = buffer; + while (p != NULL) { + /* Process complete lines */ + p++; + line = PyBytes_FromStringAndSize(q, p-q); + if (line == NULL) + goto error; + err = PyList_Append(list, line); + Py_DECREF(line); + if (err != 0) + goto error; + q = p; + p = memchr(q, '\n', end-q); + } + /* Move the remaining incomplete line to the start */ + nfilled = end-q; + memmove(buffer, q, nfilled); + if (sizehint > 0) + if (totalread >= (size_t)sizehint) + break; + if (shortread) { + sizehint = 0; + break; + } + } + if (nfilled != 0) { + /* Partial last line */ + line = PyBytes_FromStringAndSize(buffer, nfilled); + if (line == NULL) + goto error; + if (sizehint > 0) { + /* Need to complete the last line */ + PyObject *rest = Util_GetLine(self, 0); + if (rest == NULL) { + Py_DECREF(line); + goto error; + } + PyBytes_Concat(&line, rest); + Py_DECREF(rest); + if (line == NULL) + goto error; + } + err = PyList_Append(list, line); + Py_DECREF(line); + if (err != 0) + goto error; + } + +cleanup: + RELEASE_LOCK(self); + if (big_buffer) { + Py_DECREF(big_buffer); + } + return list; +} + +PyDoc_STRVAR(LZMAFile_write__doc__, +"write(data) -> None\n\ +\n\ +Write the 'data' string to file. Note that due to buffering, close() may\n\ +be needed before the file on disk reflects the data written.\n\ +"); + +/* This is a hacked version of Python's fileobject.c:file_write(). */ +static PyObject * +LZMAFile_write(LZMAFileObject *self, PyObject *args) +{ + PyObject *ret = NULL; + Py_buffer pbuf; + char *buf; + Py_ssize_t len; + lzma_ret lzuerror = LZMA_OK; + + if (!PyArg_ParseTuple(args, "y*:write", &pbuf)) + return NULL; + buf = pbuf.buf; + len = pbuf.len; + + ACQUIRE_LOCK(self); + switch (self->mode) { + case MODE_WRITE: + break; + + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + + case MODE_READ_EOF: + case MODE_READ: + default: + PyErr_SetString(PyExc_IOError, + "file is not ready for writing"); + goto cleanup; + } + + Py_BEGIN_ALLOW_THREADS + lzma_write (&lzuerror, self->fp, buf, len); + self->pos += len; + Py_END_ALLOW_THREADS + + if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto cleanup; + } + + Py_INCREF(Py_None); + ret = Py_None; + +cleanup: + PyBuffer_Release(&pbuf); + RELEASE_LOCK(self); + return ret; +} + +PyDoc_STRVAR(LZMAFile_writelines__doc__, +"writelines(sequence_of_strings) -> None\n\ +\n\ +Write the sequence of strings to the file. Note that newlines are not\n\ +added. The sequence can be any iterable object producing strings. This is\n\ +equivalent to calling write() for each string.\n\ +"); + +/* This is a hacked version of Python's fileobject.c:file_writelines(). */ +static PyObject * +LZMAFile_writelines(LZMAFileObject *self, PyObject *seq) +{ +#define CHUNKSIZE 1000 + PyObject *list = NULL; + PyObject *iter = NULL; + PyObject *ret = NULL; + PyObject *line; + int i, j, index, len, islist; + lzma_ret lzuerror = LZMA_OK; + + ACQUIRE_LOCK(self); + switch (self->mode) { + case MODE_WRITE: + break; + + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto error; + + case MODE_READ: + case MODE_READ_EOF: + default: + PyErr_SetString(PyExc_IOError, + "file is not ready for writing"); + goto error; + } + + islist = PyList_Check(seq); + if (!islist) { + iter = PyObject_GetIter(seq); + if (iter == NULL) { + PyErr_SetString(PyExc_TypeError, + "writelines() requires an iterable argument"); + goto error; + } + list = PyList_New(CHUNKSIZE); + if (list == NULL) + goto error; + } + + /* Strategy: slurp CHUNKSIZE lines into a private list, + checking that they are all strings, then write that list + without holding the interpreter lock, then come back for more. */ + for (index = 0; ; index += CHUNKSIZE) { + if (islist) { + Py_XDECREF(list); + list = PyList_GetSlice(seq, index, index+CHUNKSIZE); + if (list == NULL) + goto error; + j = PyList_GET_SIZE(list); + } + else { + for (j = 0; j < CHUNKSIZE; j++) { + line = PyIter_Next(iter); + if (line == NULL) { + if (PyErr_Occurred()) + goto error; + break; + } + PyList_SetItem(list, j, line); + } + } + if (j == 0) + break; + + /* Check that all entries are indeed byte string. If not, + apply the same rules as for file.write() and + convert the rets to strings. This is slow, but + seems to be the only way since all conversion APIs + could potentially execute Python code. */ + for (i = 0; i < j; i++) { + PyObject *v = PyList_GET_ITEM(list, i); + if (!PyBytes_Check(v)) { + const char *buffer; + Py_ssize_t len; + if (PyObject_AsCharBuffer(v, &buffer, &len)) { + PyErr_SetString(PyExc_TypeError, + "writelines() " + "argument must be " + "a sequence of " + "strings"); + goto error; + } + line = PyBytes_FromStringAndSize(buffer, + len); + if (line == NULL) + goto error; + Py_DECREF(v); + PyList_SET_ITEM(list, i, line); + } + } + + /* Since we are releasing the global lock, the + following code may *not* execute Python code. */ + Py_BEGIN_ALLOW_THREADS + for (i = 0; i < j; i++) { + line = PyList_GET_ITEM(list, i); + len = PyBytes_GET_SIZE(line); + lzma_write (&lzuerror, self->fp, + PyBytes_AS_STRING(line), len); + if (lzuerror != LZMA_OK) { + Py_BLOCK_THREADS + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto error; + } + } + Py_END_ALLOW_THREADS + + if (j < CHUNKSIZE) + break; + } + + Py_INCREF(Py_None); + ret = Py_None; + +error: + RELEASE_LOCK(self); + Py_XDECREF(list); + Py_XDECREF(iter); + return ret; +#undef CHUNKSIZE +} + +PyDoc_STRVAR(LZMAFile_seek__doc__, +"seek(offset [, whence]) -> None\n\ +\n\ +Move to new file position. Argument offset is a byte count. Optional\n\ +argument whence defaults to 0 (offset from start of file, offset\n\ +should be >= 0); other values are 1 (move relative to current position,\n\ +positive or negative), and 2 (move relative to end of file, usually\n\ +negative, although many platforms allow seeking beyond the end of a file).\n\ +\n\ +Note that seeking of lzma files is emulated, and depending on the parameters\n\ +the operation may be extremely slow.\n\ +"); + +static PyObject * +LZMAFile_seek(LZMAFileObject *self, PyObject *args) +{ + int where = 0; + PyObject *offobj; + Py_off_t offset; + char small_buffer[SMALLCHUNK]; + char *buffer = small_buffer; + Py_ssize_t buffersize = SMALLCHUNK; + Py_off_t bytesread = 0; + size_t readsize; + int chunksize; + lzma_ret lzuerror = LZMA_OK; + PyObject *ret = NULL; + + if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) + return NULL; +#if !defined(HAVE_LARGEFILE_SUPPORT) + offset = PyLong_AsLong(offobj); +#else + offset = PyLong_Check(offobj) ? + PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj); +#endif + if (PyErr_Occurred()) + return NULL; + + ACQUIRE_LOCK(self); + Util_DropReadAhead(self); + switch (self->mode) { + case MODE_READ: + case MODE_READ_EOF: + break; + + case MODE_CLOSED: + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + + case MODE_WRITE: + default: + PyErr_SetString(PyExc_IOError, + "seek works only while reading"); + goto cleanup; + } + + if (where == 2) { + if (self->size == -1) { + assert(self->mode != MODE_READ_EOF); + for (;;) { + Py_BEGIN_ALLOW_THREADS + chunksize = lzma_read(&lzuerror, self->fp, + buffer, buffersize); + self->pos += chunksize; + Py_END_ALLOW_THREADS + + bytesread += chunksize; + if (lzuerror == LZMA_STREAM_END) { + break; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto cleanup; + } + } + self->mode = MODE_READ_EOF; + self->size = self->pos; + bytesread = 0; + } + offset = self->size + offset; + } else if (where == 1) { + offset = self->pos + offset; + } + + /* Before getting here, offset must be the absolute position the file + * pointer should be set to. */ + + if (offset >= self->pos) { + /* we can move forward */ + offset -= self->pos; + } else { + /* we cannot move back, so rewind the stream */ + lzma_close(&lzuerror, self->fp); + if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto cleanup; + } + rewind(self->rawfp); + self->pos = 0; + self->fp = lzma_open(&lzuerror, self->filters, self->check, self->rawfp, self->memlimit); + if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto cleanup; + } + self->mode = MODE_READ; + } + + if (offset <= 0 || self->mode == MODE_READ_EOF ) + goto exit; + + /* Before getting here, offset must be set to the number of bytes + * to walk forward. */ + for (;;) { + if (offset-bytesread > buffersize) + readsize = buffersize; + else + /* offset might be wider that readsize, but the result + * of the subtraction is bound by buffersize (see the + * condition above). buffersize is 8192. */ + readsize = (size_t)(offset-bytesread); + Py_BEGIN_ALLOW_THREADS + chunksize = lzma_read(&lzuerror, self->fp, buffer, readsize); + self->pos += chunksize; + Py_END_ALLOW_THREADS + bytesread += chunksize; + if (lzuerror == LZMA_STREAM_END) { + self->size = self->pos; + self->mode = MODE_READ_EOF; + break; + } else if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto cleanup; + } + if (bytesread == offset) + break; + } + +exit: + Py_INCREF(Py_None); + ret = Py_None; + +cleanup: + RELEASE_LOCK(self); + return ret; +} + +PyDoc_STRVAR(LZMAFile_tell__doc__, +"tell() -> int\n\ +\n\ +Return the current file position, an integer (may be a long integer).\n\ +"); + +static PyObject * +LZMAFile_tell(LZMAFileObject *self, PyObject *args) +{ + PyObject *ret = NULL; + + if (self->mode == MODE_CLOSED) { + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + goto cleanup; + } + +#if !defined(HAVE_LARGEFILE_SUPPORT) + ret = PyLong_FromLong(self->pos); +#else + ret = PyLong_FromLongLong(self->pos); +#endif + +cleanup: + return ret; +} + +PyDoc_STRVAR(LZMAFile_close__doc__, +"close() -> None or (perhaps) an integer\n\ +\n\ +Close the file. Sets data attribute .closed to true. A closed file\n\ +cannot be used for further I/O operations. close() may be called more\n\ +than once without error.\n\ +"); + +static PyObject * +LZMAFile_close(LZMAFileObject *self) +{ + PyObject *ret = NULL; + lzma_ret lzuerror = LZMA_OK; + + if (self->mode == MODE_CLOSED) { + Py_RETURN_NONE; + } + + ACQUIRE_LOCK(self); + lzma_close(&lzuerror, self->fp); + self->mode = MODE_CLOSED; + fclose(self->rawfp); + self->rawfp = NULL; + if (lzuerror == LZMA_OK || lzuerror == LZMA_STREAM_END) { + Py_INCREF(Py_None); + ret = Py_None; + } + else { + Util_CatchLZMAError(lzuerror, NULL, self->fp->encoding); + } + + RELEASE_LOCK(self); + return ret; +} + +PyDoc_STRVAR(LZMAFile_enter_doc, +"__enter__() -> self."); + +static PyObject * +LZMAFile_enter(LZMAFileObject *self) +{ + if (self->mode == MODE_CLOSED) { + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + return NULL; + } + Py_INCREF(self); + return (PyObject *) self; +} + +PyDoc_STRVAR(LZMAFile_exit_doc, +"__exit__(*excinfo) -> None. Closes the file."); + +static PyObject * +LZMAFile_exit(LZMAFileObject *self, PyObject *args) +{ + PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL); + if (!ret) + /* If error occurred, pass through */ + return NULL; + Py_DECREF(ret); + Py_RETURN_NONE; +} + +static PyObject *LZMAFile_getiter(LZMAFileObject *self); + +static PyMethodDef LZMAFile_methods[] = { + {"read", (PyCFunction)LZMAFile_read, METH_VARARGS, LZMAFile_read__doc__}, + {"readline", (PyCFunction)LZMAFile_readline, METH_VARARGS, LZMAFile_readline__doc__}, + {"readlines", (PyCFunction)LZMAFile_readlines, METH_VARARGS, LZMAFile_readlines__doc__}, + {"write", (PyCFunction)LZMAFile_write, METH_VARARGS, LZMAFile_write__doc__}, + {"writelines", (PyCFunction)LZMAFile_writelines, METH_O, LZMAFile_writelines__doc__}, + {"seek", (PyCFunction)LZMAFile_seek, METH_VARARGS, LZMAFile_seek__doc__}, + {"tell", (PyCFunction)LZMAFile_tell, METH_NOARGS, LZMAFile_tell__doc__}, + {"close", (PyCFunction)LZMAFile_close, METH_NOARGS, LZMAFile_close__doc__}, + {"__enter__", (PyCFunction)LZMAFile_enter, METH_NOARGS, LZMAFile_enter_doc}, + {"__exit__", (PyCFunction)LZMAFile_exit, METH_VARARGS, LZMAFile_exit_doc}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; + + +/* ===================================================================== */ +/* Getters and setters of LZMAFile. */ + +static PyObject * +LZMAFile_get_closed(LZMAFileObject *self, void *closure) +{ + return PyLong_FromLong(self->mode == MODE_CLOSED); +} + +static PyGetSetDef LZMAFile_getset[] = { + {"closed", (getter)LZMAFile_get_closed, NULL, + "True if the file is closed", NULL}, + {NULL, NULL, NULL, NULL, NULL} /* sentinel */ +}; + + +/* ===================================================================== */ +/* Slot definitions for LZMAFile_Type. */ + +static int +LZMAFile_init(LZMAFileObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *name_obj = NULL, *myFilters = NULL, + *myFormat = NULL, *myCheck = NULL; + int compresslevel = LZMA_PRESET_DEFAULT, threads = 1; + char *name, *mode = "r"; + int buffering = -1; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"name", "mode", "buffering", "compresslevel", "format", + "check", "threads", "filter", "memlimit", NULL}; + + self->filters[0].options = NULL; + + self->size = -1; + self->memlimit = -1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sKiUUiO:LZMAFile", kwlist, + PyUnicode_FSConverter, &name_obj, + &mode, &buffering, &compresslevel, &myCheck, &threads, &self->memlimit, &myFilters)) + return -1; + name = PyBytes_AsString(name_obj); + + for (;;) { + int error = 0; + switch (*mode) { + case 'w': + if(!self->filters[0].options) + { + if(!init_lzma_options("LZMAFile", self->filters, &self->check, compresslevel, myFormat, myCheck, threads, myFilters)) { + Py_DECREF(name_obj); + return -1; + } + } + break; + + case 'r': + if(self->filters[0].options) + error = 1; + case 'b': + break; + + default: + error = 1; + break; + } + if (error) { + free_lzma_options(self->filters); + PyErr_Format(PyExc_ValueError, + "invalid mode char %c", *mode); + Py_DECREF(name_obj); + return -1; + } + mode++; + if (*mode == '\0') + break; + } + + mode = self->filters[0].options ? "wb" : "rb"; + + self->rawfp = fopen(name, mode); + Py_DECREF(name_obj); + if (self->rawfp == NULL) { + PyErr_SetFromErrno(PyExc_IOError); + return -1; + } + /* XXX Ignore buffering */ + + /* From now on, we have stuff to dealloc, so jump to error label + * instead of returning */ + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (!self->lock) { + PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); + goto error; + } +#endif + + self->fp = lzma_open(&lzuerror, self->filters, self->check, self->rawfp, self->memlimit); + + if (lzuerror != LZMA_OK) { + Util_CatchLZMAError(lzuerror, &self->fp->strm, self->fp->encoding); + goto error; + } + + self->mode = self->filters[0].options ? MODE_WRITE : MODE_READ; + + return 0; + +error: + fclose(self->rawfp); + self->rawfp = NULL; +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + self->lock = NULL; + } +#endif + return -1; +} + +static void +LZMAFile_dealloc(LZMAFileObject *self) +{ + lzma_ret lzuerror = LZMA_OK; +#ifdef WITH_THREAD + if (self->lock) + PyThread_free_lock(self->lock); +#endif + lzma_close(&lzuerror, self->fp); + free_lzma_options(self->filters); + Util_DropReadAhead(self); + if (self->rawfp != NULL) + fclose(self->rawfp); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +/* This is a hacked version of Python's fileobject.c:file_getiter(). */ +static PyObject * +LZMAFile_getiter(LZMAFileObject *self) +{ + if (self->mode == MODE_CLOSED) { + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + return NULL; + } + Py_INCREF((PyObject*)self); + return (PyObject *)self; +} + +/* This is a hacked version of Python's fileobject.c:file_iternext(). */ +#define READAHEAD_BUFSIZE 8192 +static PyObject * +LZMAFile_iternext(LZMAFileObject *self) +{ + PyObject* ret; + ACQUIRE_LOCK(self); + if (self->mode == MODE_CLOSED) { + RELEASE_LOCK(self); + PyErr_SetString(PyExc_ValueError, + "I/O operation on closed file"); + return NULL; + } + ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); + RELEASE_LOCK(self); + if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) { + Py_XDECREF(ret); + return NULL; + } + return (PyObject *)ret; +} + +/* ===================================================================== */ +/* LZMAFile_Type definition. */ + +PyDoc_VAR(LZMAFile__doc__) = +PyDoc_STR( +"LZMAFile(name , mode='r', buffering=0, memlimit=-1,\n" +DEFAULT_OPTIONS_STRING") -> file object\n\ +\n\ +Open a lzma file. The mode can be 'r' or 'w', for reading (default) or\n\ +writing. When opened for writing, the file will be created if it doesn't\n\ +exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ +unbuffered, and larger numbers specify the buffer size.\n\ +Data read is always returned in bytes; data written ought to be bytes.\n\ +"); + +static PyTypeObject LZMAFile_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "lzma.LZMAFile", /*tp_name*/ + sizeof(LZMAFileObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)LZMAFile_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + PyObject_GenericGetAttr, /*tp_getattro*/ + PyObject_GenericSetAttr, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + LZMAFile__doc__, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + (getiterfunc)LZMAFile_getiter, /*tp_iter*/ + (iternextfunc)LZMAFile_iternext, /*tp_iternext*/ + LZMAFile_methods, /*tp_methods*/ + 0, /*tp_members*/ + LZMAFile_getset, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + (initproc)LZMAFile_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ + PyObject_Free, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0 /*tp_version_tag*/ +}; + +/* ===================================================================== */ +/* Methods of LZMAComp. */ + +PyDoc_STRVAR(LZMAComp_compress__doc__, +"compress(data) -> string\n\ +\n\ +Feed the compressor object with data to compress sequently.\n\ +This function will return the header for the compressed string for the first\n\ +input provided, this header will be needed to concatenate with the rest of\n\ +the stream when flushing to have a proper stream able to be decompressed\n\ +again.\n"); + +static PyObject * +LZMAComp_compress(LZMACompObject *self, PyObject *args) +{ + Py_buffer pdata; + Py_ssize_t datasize, bufsize = SMALLCHUNK; + unsigned char *data; + unsigned long long totalout; + PyObject *ret = NULL; + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + INITCHECK + if (!PyArg_ParseTuple(args, "y*:compress", &pdata)) + return NULL; + data = pdata.buf; + datasize = pdata.len; + + if (datasize == 0) { + PyBuffer_Release(&pdata); + return PyBytes_FromStringAndSize("", 0); + } + + ACQUIRE_LOCK(self); + if (!self->running) { + PyErr_SetString(PyExc_ValueError, + "this object was already flushed"); + goto error; + } + + if (!(ret = PyBytes_FromStringAndSize(NULL, bufsize))) + goto error; + + lzus->avail_in = (size_t)datasize; + lzus->next_in = data; + lzus->avail_out = (size_t)bufsize; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + + totalout = lzus->total_out; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + lzuerror = lzma_code(lzus, LZMA_RUN); + Py_END_ALLOW_THREADS + if (!Util_CatchLZMAError(lzuerror, lzus, 1)) + goto error; + if (lzus->avail_in == 0) + break; /* no more input data */ + if (lzus->avail_out == 0) { + bufsize = Util_NewBufferSize(bufsize); + if (_PyBytes_Resize(&ret, bufsize) < 0) + goto error; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + (lzus->total_out - totalout); + lzus->avail_out = (size_t)bufsize - (lzus->next_out - (unsigned char *)PyBytes_AS_STRING(ret)); + } + } + + if (_PyBytes_Resize(&ret, + (Py_ssize_t)((Py_ssize_t)lzus->total_out - (Py_ssize_t)totalout)) < 0) + goto error; + + RELEASE_LOCK(self); + PyBuffer_Release(&pdata); + return ret; + +error: + RELEASE_LOCK(self); + PyBuffer_Release(&pdata); + Py_XDECREF(ret); + return NULL; +} + +PyDoc_STRVAR(LZMAComp_flush__doc__, +"flush(mode=LZMA_FINISH) -> string\n\ +\n\ +Returns a string containing any remaining compressed data.\n\ +\n\ +'mode' can be one of the constants LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH,\n\ +LZMA_FINISH; the default value used when mode is not specified is LZMA_FINISH.\n\ +If mode == LZMA_FINISH, the compressor object can no longer be used after\n\ +calling the flush() method. Otherwise, more data can still be compressed.\n"); + +static PyObject * +LZMAComp_flush(LZMACompObject *self, PyObject *args, PyObject *kwargs) +{ + Py_ssize_t bufsize = SMALLCHUNK; + PyObject *ret = NULL; + lzma_action flushmode = LZMA_FINISH; + unsigned long long totalout; + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"mode", NULL}; + + INITCHECK + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, + &flushmode)) + return NULL; + + ACQUIRE_LOCK(self); + if (!self->running) { + PyErr_SetString(PyExc_ValueError, "object was already flushed"); + goto error; + } + + switch(flushmode){ + case(LZMA_SYNC_FLUSH): + case(LZMA_FULL_FLUSH): + if(self->filters[0].id == LZMA_FILTER_LZMA1) { + PyErr_Format(LZMAError, "%d is not supported as flush mode for LZMA_Alone format", flushmode); + goto error; + } + /* Flushing with LZMA_RUN is a no-op, so there's no point in + * doing any work at all; just return an empty string. + */ + case(LZMA_RUN): + ret = PyBytes_FromStringAndSize(NULL, 0); + goto error; + case(LZMA_FINISH): + break; + default: + PyErr_Format(LZMAError, "Invalid flush mode: %d", flushmode); + goto error; + } + + self->running = 0; + if (!(ret = PyBytes_FromStringAndSize(NULL, bufsize))) + goto error; + + lzus->avail_in = 0; + lzus->avail_out = (size_t)bufsize; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + + totalout = lzus->total_out; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + lzuerror = lzma_code(lzus, flushmode); + Py_END_ALLOW_THREADS + if (!Util_CatchLZMAError(lzuerror, lzus, 1)) + goto error; + if(lzuerror == LZMA_STREAM_END) + break; /* no more input data */ + if (lzus->avail_out == 0) { + bufsize = Util_NewBufferSize(bufsize); + if (_PyBytes_Resize(&ret, bufsize) < 0) + goto error; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + (lzus->total_out - totalout);; + lzus->avail_out = (size_t)bufsize - (lzus->next_out - (unsigned char *)PyBytes_AS_STRING(ret)); + } + } + + if (lzus->avail_out != 0) { + if (_PyBytes_Resize(&ret, + (Py_ssize_t)((Py_ssize_t)lzus->total_out - (Py_ssize_t)totalout)) < 0) + goto error; + } + + RELEASE_LOCK(self); + return ret; + +error: + RELEASE_LOCK(self); + Py_XDECREF(ret); + return ret; +} + +static PyMethodDef LZMAComp_methods[] = +{ + {"compress", (PyCFunction)LZMAComp_compress, METH_VARARGS, + LZMAComp_compress__doc__}, + {"flush", (PyCFunction)LZMAComp_flush, METH_VARARGS|METH_KEYWORDS, + LZMAComp_flush__doc__}, + {0, 0, 0, 0} +}; + +static int +LZMAComp_init(LZMACompObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *myFilters = NULL, *myFormat = NULL, *myCheck = NULL; + int compresslevel = LZMA_PRESET_DEFAULT, threads = 1; + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"compresslevel", "format", "check", "threads", "filter", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iUUiO:LZMACompressor", kwlist, + &compresslevel, &myFormat, &myCheck, &threads, &myFilters)) + return -1; + + if(!init_lzma_options("LZMACompressor", self->filters, &self->check, compresslevel, + myFormat, myCheck, threads, myFilters)) + goto error; + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (!self->lock) { + PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); + goto error; + } +#endif + + if(self->filters[0].id == LZMA_FILTER_LZMA1) + lzuerror = lzma_alone_encoder(lzus, self->filters[0].options); + else if(self->check == (lzma_check)-1) + lzuerror = lzma_raw_encoder(lzus, self->filters); + else + lzuerror = lzma_stream_encoder(lzus, self->filters, self->check); + + if(!Util_CatchLZMAError(lzuerror, lzus, 1)) + goto error; + + self->is_initialised = 1; + self->running = 1; + + return 0; + +error: +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + self->lock = NULL; + } +#endif + return -1; +} + +static PyObject * +LZMACompObject_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + LZMACompObject *self; + self = (LZMACompObject *)type->tp_alloc(type, 0); + if (self != NULL){ + lzma_stream tmp = LZMA_STREAM_INIT; + + self->is_initialised = 0; + self->running = 0; + self->lzus = tmp; + self->filters[0].options = NULL; + self->check = LZMA_CHECK_NONE; + } + else + return NULL; + + return (PyObject *)self; +} + +static void +LZMAComp_dealloc(LZMACompObject *self) +{ +#ifdef WITH_THREAD + if (self->lock) + PyThread_free_lock(self->lock); +#endif + if (self->is_initialised) + lzma_end(&self->lzus); + free_lzma_options(self->filters); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyDoc_STRVAR(LZMAComp__doc__, +"LZMACompressor("DEFAULT_OPTIONS_STRING") -> compressor object\n\ +Create a new compressor object. This object may be used to compress\n\ +data sequentially. If you want to compress data in one shot, use the\n\ +compress() function instead.\n"); + +static PyTypeObject LZMAComp_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "lzma.LZMACompressor", /*tp_name*/ + sizeof(LZMACompObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)LZMAComp_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + PyObject_GenericGetAttr, /*tp_getattro*/ + PyObject_GenericSetAttr, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + LZMAComp__doc__, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + LZMAComp_methods, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + (initproc)LZMAComp_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + LZMACompObject_new, /*tp_new*/ + PyObject_Free, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0 /*tp_version_tag*/ +}; + +/* ===================================================================== */ +/* Members of LZMADecomp. */ + +PyDoc_STRVAR(LZMADecomp_decompress__doc__, +"decompress(data, max_length=0) -> string\n\ +\n\ +Return a string containing the decompressed version of the data.\n\ +\n\ +After calling this function, some of the input data may still be stored in\n\ +internal buffers for later processing.\n\ +Call the flush() method to clear these buffers.\n\ +If the max_length parameter is specified then the return value will be\n\ +no longer than max_length. Unconsumed input data will be stored in\n\ +the unconsumed_tail data descriptor."); + +static PyObject * +LZMADecomp_decompress(LZMADecompObject *self, PyObject *args, PyObject *kwargs) +{ + Py_buffer pdata; + Py_ssize_t datasize, oldbufsize, bufsize = SMALLCHUNK; + unsigned char *data; + unsigned long long start_total_out; + PyObject *ret = NULL; + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"data", "max_length", NULL}; + + INITCHECK + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|l:decompress", kwlist, + &pdata, &self->max_length)) + return NULL; + data = pdata.buf; + datasize = pdata.len; + + ACQUIRE_LOCK(self); + if (!self->running) { + PyErr_SetString(PyExc_EOFError, + "end of stream was already found"); + goto error; + } + + if (self->max_length < 0) { + PyErr_SetString(PyExc_ValueError, + "max_length must be greater than zero"); + goto error; + } + + /* limit amount of data allocated to max_length */ + if (self->max_length && bufsize > self->max_length) + bufsize = self->max_length; + + if(!(ret = PyBytes_FromStringAndSize(NULL, bufsize))) + goto error; + + start_total_out = lzus->total_out; + lzus->avail_in = (size_t)datasize; + lzus->next_in = data; + lzus->avail_out = (size_t)bufsize; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + + for (;;) { + Py_BEGIN_ALLOW_THREADS + lzuerror = lzma_code(lzus, LZMA_RUN); + Py_END_ALLOW_THREADS + + if (lzus->avail_in == 0 || lzus->avail_out != 0) + break; /* no more input data */ + + /* If max_length set, don't continue decompressing if we've already + * reached the limit. + */ + if (self->max_length && bufsize >= self->max_length) + break; + + /* otherwise, ... */ + oldbufsize= bufsize; + bufsize = bufsize << 1; + if (self->max_length && bufsize > self->max_length) + bufsize = self->max_length; + + if (_PyBytes_Resize(&ret, bufsize) < 0) + goto error; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + oldbufsize; + lzus->avail_out = (size_t)bufsize - (size_t)oldbufsize; + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + } + + /* Not all of the compressed data could be accommodated in the output + * buffer of specified size. Return the unconsumed tail in an attribute. + */ + if(self->max_length) { + Py_DECREF(self->unconsumed_tail); + self->unconsumed_tail = PyBytes_FromStringAndSize((const char *)lzus->next_in, + (Py_ssize_t)lzus->avail_in); + if(!self->unconsumed_tail) { + goto error; + } + } + + /* The end of the compressed data has been reached, so set the + * unused_data attribute to a string containing the remainder of the + * data in the string. Note that this is also a logical place to call + * lzma_end, but the old behaviour of only calling it on flush() is + * preserved. + */ + if (lzuerror == LZMA_STREAM_END) { + Py_XDECREF(self->unused_data); /* Free original empty string */ + self->unused_data = PyBytes_FromStringAndSize( + (const char *)lzus->next_in, (Py_ssize_t)lzus->avail_in); + if (self->unused_data == NULL) { + goto error; + } + /* We will only get LZMA_BUF_ERROR if the output buffer was full + * but there wasn't more output when we tried again, so it is + * not an error condition. + */ + } else if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + + if(_PyBytes_Resize(&ret, (Py_ssize_t)lzus->total_out - (Py_ssize_t)start_total_out) < 0) + goto error; + + RELEASE_LOCK(self); + PyBuffer_Release(&pdata); + return ret; + +error: + RELEASE_LOCK(self); + PyBuffer_Release(&pdata); + Py_XDECREF(ret); + return NULL; +} + +PyDoc_STRVAR(LZMADecomp_flush__doc__, +"flush(mode=LZMA_FINISH [, bufsize]) -> string\n\ +\n\ +Return a string containing any remaining decompressed data.\n\ +\n\ +If 'bufsize' is given, is the initial size of the output buffer.\n\ +\n\ +The decompressor object cannot be used again after this call."); + +static PyObject * +LZMADecomp_flush(LZMADecompObject *self, PyObject *args, PyObject *kwargs) +{ + Py_ssize_t bufsize = SMALLCHUNK; + + PyObject *ret = NULL; + lzma_action flushmode = LZMA_FINISH; + unsigned long long start_total_out; + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"mode", "bufsize", NULL}; + + INITCHECK + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii:flush", kwlist, + &flushmode, &bufsize)) + return NULL; + + ACQUIRE_LOCK(self); + if (!self->running) { + PyErr_SetString(PyExc_ValueError, "object was already flushed"); + goto error; + } + + switch(flushmode){ + case(LZMA_SYNC_FLUSH): + case(LZMA_FULL_FLUSH): + PyErr_Format(LZMAError, "%d is not supported as flush mode for decoding", flushmode); + goto error; + case(LZMA_RUN): + case(LZMA_FINISH): + break; + default: + PyErr_Format(LZMAError, "Invalid flush mode: %d", flushmode); + goto error; + } + + if (!(ret = PyBytes_FromStringAndSize(NULL, bufsize))) + goto error; + + + start_total_out = lzus->total_out; + lzus->avail_out = (size_t)bufsize; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + + for (;;) { + Py_BEGIN_ALLOW_THREADS + lzuerror = lzma_code(lzus, flushmode); + Py_END_ALLOW_THREADS + + if (lzus->avail_in == 0 || lzus->avail_out != 0) + break; /* no more input data */ + + if (_PyBytes_Resize(&ret, bufsize << 1) < 0) + goto error; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + bufsize; + lzus->avail_out = (size_t)bufsize; + bufsize = bufsize << 1; + + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + } + + + /* If flushmode is LZMA_FINISH, we also have to call lzma_end() to free + * various data structures. Note we should only get LZMA_STREAM_END when + * flushmode is LZMA_FINISH + */ + if (lzuerror == LZMA_STREAM_END) { + lzma_end(lzus); + self->running = 0; + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + } + _PyBytes_Resize(&ret, (Py_ssize_t)lzus->total_out - (Py_ssize_t)start_total_out); + + RELEASE_LOCK(self); + return ret; + +error: + RELEASE_LOCK(self); + Py_XDECREF(ret); + return ret; +} + +static PyMemberDef LZMADecomp_members[] = { + {"unused_data", T_OBJECT, offsetof(LZMADecompObject, unused_data), + READONLY, NULL}, + {"unconsumed_tail", T_OBJECT, offsetof(LZMADecompObject, + unconsumed_tail), READONLY, NULL}, + {NULL, 0, 0, 0, NULL} /* sentinel */ +}; + +static PyMethodDef LZMADecomp_methods[4] = +{ + {"decompress", (PyCFunction)LZMADecomp_decompress, METH_VARARGS|METH_KEYWORDS, + LZMADecomp_decompress__doc__}, + {"flush", (PyCFunction)LZMADecomp_flush, METH_VARARGS|METH_KEYWORDS, + LZMADecomp_flush__doc__}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; + +static PyObject * +LZMADecompObject_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + LZMADecompObject *self; + self = (LZMADecompObject *)type->tp_alloc(type, 0); + + if (self != NULL){ + lzma_stream tmp = LZMA_STREAM_INIT; + + self->is_initialised = 0; + self->running = 0; + self->max_length = 0; + self->memlimit = -1; + if((self->unused_data = PyBytes_FromString("")) == NULL) + goto error; + if((self->unconsumed_tail = PyBytes_FromString("")) == NULL) + goto error; + self->lzus = tmp; + } + else + return NULL; + + return (PyObject *)self; +error: + Py_DECREF(self); + return NULL; +} + +static void +LZMADecomp_dealloc(LZMADecompObject *self) +{ +#ifdef WITH_THREAD + if (self->lock) + PyThread_free_lock(self->lock); +#endif + if (self->is_initialised) + lzma_end(&self->lzus); + Py_XDECREF(self->unused_data); + Py_XDECREF(self->unconsumed_tail); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +LZMADecomp_init(LZMADecompObject *self, PyObject *args, PyObject *kwargs) +{ + lzma_stream *lzus = &self->lzus; + lzma_ret lzuerror = LZMA_OK; + + static char *kwlist[] = {"max_length", "memlimit", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|lK:LZMADecompressor", kwlist, + &self->max_length, &self->memlimit)) + return -1; + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (!self->lock) { + PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); + goto error; + } +#endif + + if (self->max_length < 0) { + PyErr_SetString(PyExc_ValueError, + "max_length must be greater than zero"); + goto error; + } + + lzuerror = lzma_auto_decoder(lzus, self->memlimit, LZMA_CONCATENATED); + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + + self->is_initialised = 1; + self->running = 1; + + return 0; + +error: +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + self->lock = NULL; + } +#endif + Py_CLEAR(self->unused_data); + return -1; +} + +PyDoc_STRVAR(LZMADecomp__doc__, +"LZMADecompressor(max_length=0, memlimit=-1) -> decompressor object\n\ +\n\ +Create a new decompressor object. This object may be used to decompress\n\ +data sequentially. If you want to decompress data in one shot, use the\n\ +decompress() function instead.\n"); + +static PyTypeObject LZMADecomp_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "lzma.LZMADecompressor", /*tp_name*/ + sizeof(LZMADecompObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)LZMADecomp_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + PyObject_GenericGetAttr, /*tp_getattro*/ + PyObject_GenericSetAttr, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + LZMADecomp__doc__, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + LZMADecomp_methods, /*tp_methods*/ + LZMADecomp_members, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + (initproc)LZMADecomp_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + LZMADecompObject_new, /*tp_new*/ + PyObject_Free, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0 /*tp_version_tag*/ +}; + +/* ===================================================================== */ +/* Module functions. */ + +PyDoc_STRVAR(LZMA_compress__doc__, +"compress(data, "DEFAULT_OPTIONS_STRING") -> string\n\ +\n\ +Compress data using the given parameters, returning a string\n\ +containing the compressed data."); + +static PyObject * +LZMA_compress(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *ret = NULL, *myFilters = NULL, + *myFormat = NULL, *myCheck = NULL; + Py_buffer pdata; + int compresslevel = LZMA_PRESET_DEFAULT, threads = 1; + const unsigned char *data; + Py_ssize_t datasize, bufsize; + lzma_ret lzuerror = LZMA_OK; + lzma_stream _lzus; + lzma_stream *lzus = &_lzus; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_check check = LZMA_CHECK_NONE; + lzma_stream tmp = LZMA_STREAM_INIT; + + static char *kwlist[] = {"data", "compresslevel", "format", "check", "threads", "filter", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iUUiO:compress", kwlist, + &pdata, &compresslevel, &myFormat, &myCheck, &threads, &myFilters)) + return NULL; + + if(!init_lzma_options("compress", filters, &check, compresslevel, myFormat, myCheck, threads, myFilters)) + return NULL; + + data = pdata.buf; + datasize = pdata.len; + + *lzus = tmp; + bufsize = lzma_stream_buffer_bound(datasize); + /* TODO: if(bufsize == 0) goto error; */ + + if (!(ret = PyBytes_FromStringAndSize(NULL, bufsize))) + return NULL; + + if(filters[0].id == LZMA_FILTER_LZMA1) + { + lzuerror = lzma_alone_encoder(lzus, filters[0].options); + + if(!Util_CatchLZMAError(lzuerror, lzus, 1)) + goto error; + + lzus->avail_in = (size_t)datasize; + lzus->next_in = data; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + lzus->avail_out = (size_t)bufsize; + + for (;;) { + Py_BEGIN_ALLOW_THREADS + lzuerror = lzma_code(lzus, LZMA_FINISH); + Py_END_ALLOW_THREADS + if (!Util_CatchLZMAError(lzuerror, lzus, 1)) + goto error; + if(lzuerror == LZMA_STREAM_END) + break; /* no more input data */ + if (lzus->avail_out == 0) { + bufsize = Util_NewBufferSize(bufsize); + if (_PyBytes_Resize(&ret, bufsize) < 0) + goto error; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + lzus->total_out; + lzus->avail_out = (size_t)bufsize - (lzus->next_out - (unsigned char *)PyBytes_AS_STRING(ret)); + } + } + + lzma_end(lzus); + if (lzuerror == LZMA_STREAM_END) { + if(_PyBytes_Resize(&ret, (Py_ssize_t)lzus->total_out) < 0) { + ret = NULL; + } + } + } + else { + size_t loc = 0; + Py_BEGIN_ALLOW_THREADS + if(check == (lzma_check)-1) + lzuerror = lzma_raw_buffer_encode(filters, NULL, data, (size_t)datasize, + (unsigned char *)PyBytes_AS_STRING(ret), &loc, (size_t)bufsize); + else + lzuerror = lzma_stream_buffer_encode(filters, check, NULL, data, (size_t)datasize, + (unsigned char *)PyBytes_AS_STRING(ret), &loc, (size_t)bufsize); + Py_END_ALLOW_THREADS + _PyBytes_Resize(&ret, (Py_ssize_t)loc); + } + + PyBuffer_Release(&pdata); + free_lzma_options(filters); + + return ret; + +error: + if(lzuerror != LZMA_MEM_ERROR && lzuerror != LZMA_PROG_ERROR) + lzma_end(lzus); + Py_XDECREF(ret); + PyBuffer_Release(&pdata); + free_lzma_options(filters); + + return NULL; +} + +PyDoc_STRVAR(LZMA_decompress__doc__, +"decompress(data, bufsize=8192, memlimit=-1, flags=LZMA_CONCATENATED|LZMA_TELL_UNSUPPORTED_CHECK) -> string\n\ +\n\ +Decompress data in one shot. If you want to decompress data sequentially,\n\ +use an instance of LZMADecompressor instead.\n\ +\n\ +Optional arg 'bufsize' is the initial output buffer size.\n\ +Optional arg 'memlimit' is the maximum amount of memory the decoder may use,\n\ +-1 means no limit."); + +static PyObject * +LZMA_decompress(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *ret = NULL; + Py_buffer pdata; + const unsigned char *data; + Py_ssize_t datasize, bufsize = SMALLCHUNK; + unsigned long long memlimit = -1; + unsigned int flags = LZMA_CONCATENATED|LZMA_TELL_UNSUPPORTED_CHECK; + lzma_ret lzuerror = LZMA_OK; + lzma_stream _lzus; + lzma_stream *lzus = &_lzus; + lzma_stream tmp = LZMA_STREAM_INIT; + + static char *kwlist[] = {"data", "bufsize", "memlimit", "flags", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|lKi:decompress", kwlist, + &pdata, &bufsize, &memlimit, &flags)) + return NULL; + data = pdata.buf; + datasize = pdata.len; + + if (datasize == 0) { + PyBuffer_Release(&pdata); + return PyBytes_FromStringAndSize("", 0); + } + + ret = PyBytes_FromStringAndSize(NULL, bufsize); + if (!ret) { + PyBuffer_Release(&pdata); + return NULL; + } + + *lzus = tmp; + + lzus->avail_in = (size_t)datasize; + lzus->avail_out = (size_t)bufsize; + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret); + lzus->next_in = data; + + lzuerror = lzma_auto_decoder(lzus, memlimit, flags); + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + + while (lzuerror != LZMA_STREAM_END){ + Py_BEGIN_ALLOW_THREADS + lzuerror=lzma_code(lzus, LZMA_FINISH); + Py_END_ALLOW_THREADS + + if(!Util_CatchLZMAError(lzuerror, lzus, 0)) + goto error; + if(lzuerror == LZMA_STREAM_END) + break; + if(lzuerror == LZMA_OK){ + if (_PyBytes_Resize(&ret, Util_NewBufferSize(bufsize)) < 0) { + goto error; + } + lzus->next_out = (unsigned char *)PyBytes_AS_STRING(ret) + bufsize; + lzus->avail_out = (size_t)bufsize; + bufsize = Util_NewBufferSize(bufsize); + } + } + + if(_PyBytes_Resize(&ret, (Py_ssize_t)lzus->total_out) < 0) + ret = NULL; + lzma_end(lzus); + PyBuffer_Release(&pdata); + + return ret; + +error: + if(lzuerror != LZMA_MEM_ERROR && lzuerror != LZMA_PROG_ERROR) + lzma_end(lzus); + Py_XDECREF(ret); + PyBuffer_Release(&pdata); + + return NULL; +} + +PyDoc_STRVAR(LZMA_crc32__doc__, +"crc32(data[, start]) -> int\n\ +\n\ +Compute a CRC-32 checksum of string.\n\ +\n\ +An optional starting value 'start' can be specified."); + +static PyObject * +LZMA_crc32(PyObject *self, PyObject *args) +{ + unsigned int crc32val = lzma_crc32(NULL, (size_t)0, (unsigned int)0); + const unsigned char *buf; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &size, &crc32val)) + return NULL; + crc32val = lzma_crc32(buf, (size_t)size, crc32val); + return PyLong_FromUnsignedLong((unsigned long)crc32val); +} + +PyDoc_STRVAR(LZMA_crc64__doc__, +"crc64(data[, start]) -> int\n\ +\n\ +Compute a CRC-64 checksum of string.\n\ +\n\ +An optional starting value 'start' can be specified."); + +static PyObject * +LZMA_crc64(PyObject *self, PyObject *args) +{ + unsigned long long crc64val = lzma_crc64(NULL, (size_t)0, (unsigned long long)0); + const unsigned char *buf; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "s#|K:crc64", &buf, &size, &crc64val)) + return NULL; + crc64val = lzma_crc64(buf, (size_t)size, crc64val); + return PyLong_FromUnsignedLongLong(crc64val); +} + +static PyMethodDef lzma_methods[] = { + {"compress", (PyCFunction)LZMA_compress, + METH_VARARGS|METH_KEYWORDS, LZMA_compress__doc__}, + {"crc32", (PyCFunction)LZMA_crc32, + METH_VARARGS, LZMA_crc32__doc__}, + {"crc64", (PyCFunction)LZMA_crc64, + METH_VARARGS, LZMA_crc64__doc__}, + {"decompress", (PyCFunction)LZMA_decompress, + METH_VARARGS|METH_KEYWORDS, LZMA_decompress__doc__}, + {0, 0, 0, 0} +}; + +PyDoc_STRVAR(lzma_module_documentation, +"The python lzma module provides a comprehensive interface for\n\ +the lzma compression library. It implements one shot (de)compression\n\ +functions, CRC-32 & CRC-64 checksum computations, types for sequential\n\ +(de)compression, and advanced options for lzma compression.\n\ +"); + +static struct PyModuleDef lzmamodule = { + PyModuleDef_HEAD_INIT, + "lzma", + lzma_module_documentation, + -1, + lzma_methods, + NULL, + NULL, + NULL, + NULL +}; + +/* ===================================================================== */ +/* Initialization function. */ + +/* declare function before defining it to avoid compile warnings */ +PyMODINIT_FUNC +PyInit_lzma(void) +{ + PyObject *optionsSingleton, *module; + + Options = (LZMAOptionsObject*)(optionsSingleton = PyType_GenericNew(&LZMAOptions_Type, NULL, NULL)); + + if (PyType_Ready(&LZMAFile_Type) < 0) + return NULL; + if (PyType_Ready(&LZMAComp_Type) < 0) + return NULL; + if (PyType_Ready(&LZMADecomp_Type) < 0) + return NULL; + if (PyType_Ready(&LZMAOptions_Type) < 0) + return NULL; + + module = PyModule_Create(&lzmamodule); + if (module == NULL) + return NULL; + + LZMAError = PyErr_NewException("LZMA.error", NULL, NULL); + if (LZMAError != NULL) { + Py_INCREF(LZMAError); + PyModule_AddObject(module, "error", LZMAError); + } + + Py_INCREF(&LZMAOptions_Type); + PyModule_AddObject(module, "LZMAOptions", (PyObject *)&LZMAOptions_Type); + + Py_INCREF(&LZMAComp_Type); + PyModule_AddObject(module, "LZMACompressor", (PyObject *)&LZMAComp_Type); + + Py_INCREF(&LZMADecomp_Type); + PyModule_AddObject(module, "LZMADecompressor", (PyObject *)&LZMADecomp_Type); + + Py_INCREF(&LZMAFile_Type); + PyModule_AddObject(module, "LZMAFile", (PyObject *)&LZMAFile_Type); + + PyModule_AddObject(module, "options", optionsSingleton); + PyModule_AddIntConstant(module, "LZMA_RUN", LZMA_RUN); + PyModule_AddIntConstant(module, "LZMA_SYNC_FLUSH", LZMA_SYNC_FLUSH); + PyModule_AddIntConstant(module, "LZMA_FULL_FLUSH", LZMA_FULL_FLUSH); + PyModule_AddIntConstant(module, "LZMA_FINISH", LZMA_FINISH); + + /* Decoding flags */ + PyModule_AddIntConstant(module, "LZMA_TELL_NO_CHECK", LZMA_TELL_NO_CHECK); + PyModule_AddIntConstant(module, "LZMA_TELL_UNSUPPORTED_CHECK", LZMA_TELL_UNSUPPORTED_CHECK); + PyModule_AddIntConstant(module, "LZMA_TELL_ANY_CHECK", LZMA_TELL_ANY_CHECK); + PyModule_AddIntConstant(module, "LZMA_CONCATENATED", LZMA_CONCATENATED); + + PyModule_AddObject(module, "__author__", PyUnicode_FromString(__author__)); + PyModule_AddObject(module, "LZMA_VERSION", PyUnicode_FromString(LZMA_VERSION_STRING)); + PyModule_AddStringConstant(module, "__version__", VERSION); + + return module; +}