diff --git a/Lib/bz2.py b/Lib/bz2.py new file mode 100644 --- /dev/null +++ b/Lib/bz2.py @@ -0,0 +1,348 @@ +"""This module provides an interface to the bz2 compression library. + +It contains a file interface, classes for in-memory incremental +(de)compression, and functions for one-shot (de)compression. +""" + +__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", "compress", + "decompress"] + +__author__ = "Nadeem Vawda " + +from _bz2 import BZ2Compressor, BZ2Decompressor, compress, decompress + +import threading + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_READ_EOF = 2 +_MODE_WRITE = 3 + +_BUFFER_SIZE = 8192 + + +class BZ2File: + + """ + """ + + def __init__(self, filename=None, mode="r", buffering=None, + compresslevel=9, fileobj=None): + """Open a bz2 file. + + Exactly one of filename should be provided. If fileobj is not + None, it should be a file-like object to read data from / write + data to. Otherwise, the file named by filename will be opened. + + mode can be 'r' for reading/decompression (default), or 'w' for + writing/decompression. + + compresslevel is a number between 1 and 9 specifying the level + of compression to use. + + buffering is ignored; its use is deprecated. + """ + try: + self._fp = None + self._closefp = False + self._pos = 0 + self._size = -1 + + if mode in ("", "r", "rb"): + mode = "rb" + self._mode = _MODE_READ + self._decompressor = BZ2Decompressor() + self._readahead = b"" + elif mode in ("w", "wb"): + mode = "wb" + self._mode = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if filename is not None and fileobj is None: + self._fp = open(filename, mode) + self._closefp = True + elif fileobj is not None and filename is None: + self._fp = fileobj + else: + raise ValueError("Must give exactly one of filename and fileobj") + + self._lock = threading.Lock() + except: + self._mode = _MODE_CLOSED + if self._fp and self._closefp: + try: + self._fp.close() + except Exception: + pass + self._fp = None + self._closefp = False + raise + + def read(self, size=-1): + """Read at most size uncompressed bytes from the file. + + If size is omitted or negative, read until EOF is reached. + On end-of-file, returns b''. + """ + with self._lock: + self._check_can_read() + if self._mode == _MODE_READ_EOF or size == 0: + return b"" + data = [] + nread = 0 + while self._mode != _MODE_READ_EOF: + if size < 0 or nread + _BUFFER_SIZE <= size: + block = self._read_block(_BUFFER_SIZE) + else: + block = self._read_block(size - nread) + data.append(block) + nread += len(block) + if size > 0 and nread == size: + break + return b"".join(data) + + def readline(self, size=-1): + """Read and return a line of bytes from the file. + + The terminating newline (if present) is retained. If size is + positive, at most size bytes will be read (in which case the + line may be incomplete). On end-of-file, returns b''. + """ + if not isinstance(size, int): # Keep testReadLine() happy + raise TypeError() + with self._lock: + self._check_can_read() + return self._read_line(size) + + def readlines(self, sizehint=-1): + """Read and return a list of lines from the file. + + If size is positive, it controls the number of lines read: no + further lines will be read once the total size of the lines read + so far equals or exceeds size. + """ + with self._lock: + self._check_can_read() + lines = [] + nread = 0 + while (self._mode != _MODE_READ_EOF and + (sizehint < 0 or nread < sizehint)): + line = self._read_line(-1) + if not line: + break + lines.append(line) + nread += len(line) + return lines + + def write(self, data): + """Write data (bytes) to the file. + + Returns the number of bytes written, which is always len(data). + Note that due to buffering, the file on disk may not reflect the + data written until close() is called. + """ + with self._lock: + self._check_can_write() + return self._write(data) + + def writelines(self, seq): + """Write a sequence of bytes objects to the file. + + Returns the number of bytes written. + + This is equivalent to calling write() for each bytes object. + Note that newlines are not added. seq can be any iterable + yielding bytes objects. + """ + with self._lock: + self._check_can_write() + nwritten = 0 + for block in seq: + nwritten += self._write(block) + return nwritten + + def seek(self, offset, whence=0): + """Change file position. + + The position is specified by offset, relative to the position + indicated by whence: + + 0: start of stream (default); offset cannot be negative + 1: current stream position + 2: end of stream; offset cannot be positive + + Returns the new absolute position in the file. + """ + with self._lock: + self._check_can_seek() + + # Recalculate offset as an absolute file position. + if whence == 0: + pass + elif whence == 1: + offset = self._pos + offset + elif whence == 2: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self._mode != _MODE_READ_EOF: + self._read_block(_BUFFER_SIZE) + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Arrange that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0 and self._mode != _MODE_READ_EOF: + block = self._read_block(min(offset, _BUFFER_SIZE)) + offset -= len(block) + return self._pos + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + return self._pos + + def close(self): + """Flush and close the file. + + A closed file cannot be used for further I/O. close() may be + called more than once without error. The closed property can be + used to determine whether the file has been closed. + """ + with self._lock: + self._close() + + def _close(self): + exception = None + if self._mode == _MODE_CLOSED: + return + try: + if self._mode in (_MODE_READ, _MODE_READ_EOF): + self._decompressor = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._readahead = None + + @property + def closed(self): + return self.mode == _MODE_CLOSED + + def __enter__(self): + """Context management protocol. Returns self.""" + self._check_not_closed() + return self + + def __exit__(self, *args): + """Context management protocol. Calls close().""" + self.close() + + def __iter__(self): + self._check_not_closed() + return self + + def __next__(self): + with self._lock: + self._check_can_read() + line = self._read_line(-1) + if not line: + raise StopIteration() + return line + + def __del__(self): + self._close() + + # Functions to check the file mode for various operations. + + def _check_not_closed(self): + if self._mode == _MODE_CLOSED: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise IOError("File not open for reading") + + def _check_can_write(self): + if self._mode != _MODE_WRITE: + self._check_not_closed() + raise IOError("File not open for writing") + + def _check_can_seek(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise IOError("Seeking only works on files open for reading") + + # Fill the readahead buffer. Returns false at EOF + def _fill_readahead(self): + if self._readahead: + return True + rawblock = self._fp.read(_BUFFER_SIZE) + if not rawblock: + return False + self._readahead = self._decompressor.decompress(rawblock) + return True + + # Read a block of up to size bytes. + def _read_block(self, size): + data = bytearray() + while len(data) < size: + if not self._fill_readahead(): + self._mode = _MODE_READ_EOF + self._size = self._pos + break + ct = min(size - len(data), len(self._readahead)) + data.extend(self._readahead[:ct]) + self._readahead = self._readahead[ct:] + self._pos += ct + return data + + # Read a single line of up to max_size bytes. max_size may be negative. + def _read_line(self, max_size): + line = bytearray() + i = 0 + while True: + if i == len(self._readahead): + self._readahead = b"" + if not self._fill_readahead(): + break + i = 0 + c = self._readahead[i] + i += 1 + self._pos += 1 + line.append(c) + if len(line) == max_size or c == 0x0a: # b'\n' = b'\x0a' + break + self._readahead = self._readahead[i:] + return line + + # Compress and write a block of data. + def _write(self, data): + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0, 0) + self._mode = _MODE_READ + self._pos = 0 + self._decompressor = BZ2Decompressor() + self._readahead = b"" diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -21,7 +21,30 @@ class BaseTest(unittest.TestCase): "Base for other testcases." - TEXT = b'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n' + TEXT_LINES = [ + b'root:x:0:0:root:/root:/bin/bash\n', + b'bin:x:1:1:bin:/bin:\n', + b'daemon:x:2:2:daemon:/sbin:\n', + b'adm:x:3:4:adm:/var/adm:\n', + b'lp:x:4:7:lp:/var/spool/lpd:\n', + b'sync:x:5:0:sync:/sbin:/bin/sync\n', + b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n', + b'halt:x:7:0:halt:/sbin:/sbin/halt\n', + b'mail:x:8:12:mail:/var/spool/mail:\n', + b'news:x:9:13:news:/var/spool/news:\n', + b'uucp:x:10:14:uucp:/var/spool/uucp:\n', + b'operator:x:11:0:operator:/root:\n', + b'games:x:12:100:games:/usr/games:\n', + b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n', + b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n', + b'nobody:x:65534:65534:Nobody:/home:\n', + b'postfix:x:100:101:postfix:/var/spool/postfix:\n', + b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n', + b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n', + b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n', + b'www:x:103:104::/var/www:/bin/false\n', + ] + TEXT = b''.join(TEXT_LINES) DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`' DATA_CRLF = b'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80' @@ -54,13 +77,15 @@ if os.path.isfile(self.filename): os.unlink(self.filename) - def createTempFile(self, crlf=0): + def getData(self, crlf=False): + if crlf: + return self.DATA_CRLF + else: + return self.DATA + + def createTempFile(self, crlf=False): with open(self.filename, "wb") as f: - if crlf: - data = self.DATA_CRLF - else: - data = self.DATA - f.write(data) + f.write(self.getData(crlf)) def testRead(self): # "Test BZ2File.read()" @@ -70,7 +95,7 @@ self.assertEqual(bz2f.read(), self.TEXT) def testRead0(self): - # Test BBZ2File.read(0)" + # "Test BBZ2File.read(0)" self.createTempFile() with BZ2File(self.filename) as bz2f: self.assertRaises(TypeError, bz2f.read, None) @@ -86,7 +111,7 @@ if not str: break text += str - self.assertEqual(text, text) + self.assertEqual(text, self.TEXT) def testRead100(self): # "Test BZ2File.read(100)" @@ -125,7 +150,7 @@ bz2f = BZ2File(self.filename) bz2f.close() self.assertRaises(ValueError, bz2f.__next__) - # This call will deadlock of the above .__next__ call failed to + # This call will deadlock if the above .__next__ call failed to # release the lock. self.assertRaises(ValueError, bz2f.readlines) @@ -278,17 +303,56 @@ t.join() def testMixedIterationReads(self): - # Issue #8397: mixed iteration and reads should be forbidden. - with bz2.BZ2File(self.filename, 'wb') as f: - # The internal buffer size is hard-wired to 8192 bytes, we must - # write out more than that for the test to stop half through - # the buffer. - f.write(self.TEXT * 100) - with bz2.BZ2File(self.filename, 'rb') as f: - next(f) - self.assertRaises(ValueError, f.read) - self.assertRaises(ValueError, f.readline) - self.assertRaises(ValueError, f.readlines) + # "Test mixed iteration and reads." + self.createTempFile() + linelen = len(self.TEXT_LINES[0]) + halflen = linelen // 2 + with bz2.BZ2File(self.filename) as bz2f: + bz2f.read(halflen) + self.assertEqual(next(bz2f), self.TEXT_LINES[0][halflen:]) + self.assertEqual(bz2f.read(), self.TEXT[linelen:]) + with bz2.BZ2File(self.filename) as bz2f: + bz2f.readline() + self.assertEqual(next(bz2f), self.TEXT_LINES[1]) + self.assertEqual(bz2f.readline(), self.TEXT_LINES[2]) + with bz2.BZ2File(self.filename) as bz2f: + bz2f.readlines() + with self.assertRaises(StopIteration): + next(bz2f) + self.assertEqual(bz2f.readlines(), []) + + def testReadBytesIO(self): + # "Test BZ2File.read() with BytesIO source" + with BytesIO(self.getData()) as bio: + with BZ2File(fileobj=bio) as bz2f: + self.assertRaises(TypeError, bz2f.read, None) + self.assertEqual(bz2f.read(), self.TEXT) + self.assertFalse(bio.closed) + + def testWriteBytesIO(self): + # "Test BZ2File.write() with BytesIO destination" + with BytesIO() as bio: + with BZ2File(fileobj=bio, mode="w") as bz2f: + self.assertRaises(TypeError, bz2f.write) + bz2f.write(self.TEXT) + self.assertEqual(self.decompress(bio.getvalue()), self.TEXT) + self.assertFalse(bio.closed) + + def testSeekForwardBytesIO(self): + # "Test BZ2File.seek(150, 0) with BytesIO source" + with BytesIO(self.getData()) as bio: + with BZ2File(fileobj=bio) as bz2f: + self.assertRaises(TypeError, bz2f.seek) + bz2f.seek(150) + self.assertEqual(bz2f.read(), self.TEXT[150:]) + + def testSeekBackwardsBytesIO(self): + # "Test BZ2File.seek(-150, 1) with BytesIO source" + with BytesIO(self.getData()) as bio: + with BZ2File(fileobj=bio) as bz2f: + bz2f.read(500) + bz2f.seek(-150, 1) + self.assertEqual(bz2f.read(), self.TEXT[500-150:]) class BZ2CompressorTest(BaseTest): def testCompress(self): diff --git a/Modules/bz2module.c b/Modules/_bz2module.c rename from Modules/bz2module.c rename to Modules/_bz2module.c --- a/Modules/bz2module.c +++ b/Modules/_bz2module.c @@ -1,6 +1,6 @@ /* -python-bz2 - python bz2 library interface +bz2 - Python interface to libbzip2 Copyright (c) 2002 Gustavo Niemeyer Copyright (c) 2002 Python Software Foundation; All Rights Reserved @@ -8,7 +8,6 @@ */ #include "Python.h" -#include #include #include "structmember.h" @@ -16,34 +15,10 @@ #include "pythread.h" #endif -static char __author__[] = -"The bz2 python module was written by:\n\ -\n\ - Gustavo Niemeyer \n\ -"; - -/* Our very own off_t-like type, 64-bit if possible */ -/* copied from Objects/fileobject.c */ -#if !defined(HAVE_LARGEFILE_SUPPORT) -typedef off_t Py_off_t; -#elif SIZEOF_OFF_T >= 8 -typedef off_t Py_off_t; -#elif SIZEOF_FPOS_T >= 8 -typedef fpos_t Py_off_t; -#else -#error "Large file support, but neither off_t nor fpos_t is large enough." -#endif +static char __author__[] = "Gustavo Niemeyer "; #define BUF(v) PyBytes_AS_STRING(v) -#define MODE_CLOSED 0 -#define MODE_READ 1 -#define MODE_READ_EOF 2 -#define MODE_WRITE 3 - -#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type) - - #ifdef BZ_CONFIG_ERROR #if SIZEOF_LONG >= 8 @@ -59,12 +34,6 @@ #else /* ! BZ_CONFIG_ERROR */ -#define BZ2_bzRead bzRead -#define BZ2_bzReadOpen bzReadOpen -#define BZ2_bzReadClose bzReadClose -#define BZ2_bzWrite bzWrite -#define BZ2_bzWriteOpen bzWriteOpen -#define BZ2_bzWriteClose bzWriteClose #define BZ2_bzCompress bzCompress #define BZ2_bzCompressInit bzCompressInit #define BZ2_bzCompressEnd bzCompressEnd @@ -90,34 +59,12 @@ #define RELEASE_LOCK(obj) #endif -/* Bits in f_newlinetypes */ -#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ -#define NEWLINE_CR 1 /* \r newline seen */ -#define NEWLINE_LF 2 /* \n newline seen */ -#define NEWLINE_CRLF 4 /* \r\n newline seen */ /* ===================================================================== */ /* Structure definitions. */ typedef struct { PyObject_HEAD - FILE *rawfp; - - char* f_buf; /* Allocated readahead buffer */ - char* f_bufend; /* Points after last occupied position */ - char* f_bufptr; /* Current buffer position */ - - BZFILE *fp; - int mode; - Py_off_t pos; - Py_off_t size; -#ifdef WITH_THREAD - PyThread_type_lock lock; -#endif -} BZ2FileObject; - -typedef struct { - PyObject_HEAD bz_stream bzs; int running; #ifdef WITH_THREAD @@ -138,22 +85,6 @@ /* ===================================================================== */ /* Utility functions. */ -/* Refuse regular I/O if there's data in the iteration-buffer. - * Mixing them would cause data to arrive out of order, as the read* - * methods don't use the iteration buffer. */ -static int -check_iterbuffered(BZ2FileObject *f) -{ - if (f->f_buf != NULL && - (f->f_bufend - f->f_bufptr) > 0 && - f->f_buf[0] != '\0') { - PyErr_SetString(PyExc_ValueError, - "Mixing iteration and read methods would lose data"); - return -1; - } - return 0; -} - static int Util_CatchBZ2Error(int bzerror) { @@ -239,1179 +170,6 @@ return currentsize + SMALLCHUNK; } -/* This is a hacked version of Python's fileobject.c:get_line(). */ -static PyObject * -Util_GetLine(BZ2FileObject *f, int n) -{ - char c; - char *buf, *end; - size_t total_v_size; /* total # of slots in buffer */ - size_t used_v_size; /* # used slots in buffer */ - size_t increment; /* amount to increment the buffer */ - PyObject *v; - int bzerror; - int bytes_read; - - total_v_size = n > 0 ? n : 100; - v = PyBytes_FromStringAndSize((char *)NULL, total_v_size); - if (v == NULL) - return NULL; - - buf = BUF(v); - end = buf + total_v_size; - - for (;;) { - Py_BEGIN_ALLOW_THREADS - do { - bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); - f->pos++; - if (bytes_read == 0) - break; - *buf++ = c; - } while (bzerror == BZ_OK && c != '\n' && buf != end); - Py_END_ALLOW_THREADS - if (bzerror == BZ_STREAM_END) { - f->size = f->pos; - f->mode = MODE_READ_EOF; - break; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - Py_DECREF(v); - return NULL; - } - if (c == '\n') - break; - /* Must be because buf == end */ - if (n > 0) - break; - used_v_size = total_v_size; - increment = total_v_size >> 2; /* mild exponential growth */ - total_v_size += increment; - if (total_v_size > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "line is longer than a Python string can hold"); - Py_DECREF(v); - return NULL; - } - if (_PyBytes_Resize(&v, total_v_size) < 0) { - return NULL; - } - buf = BUF(v) + used_v_size; - end = BUF(v) + total_v_size; - } - - used_v_size = buf - BUF(v); - if (used_v_size != total_v_size) { - if (_PyBytes_Resize(&v, used_v_size) < 0) { - v = NULL; - } - } - return v; -} - -/* This is a hacked version of Python's fileobject.c:drop_readahead(). */ -static void -Util_DropReadAhead(BZ2FileObject *f) -{ - if (f->f_buf != NULL) { - PyMem_Free(f->f_buf); - f->f_buf = NULL; - } -} - -/* This is a hacked version of Python's fileobject.c:readahead(). */ -static int -Util_ReadAhead(BZ2FileObject *f, int bufsize) -{ - int chunksize; - int bzerror; - - if (f->f_buf != NULL) { - if((f->f_bufend - f->f_bufptr) >= 1) - return 0; - else - Util_DropReadAhead(f); - } - if (f->mode == MODE_READ_EOF) { - f->f_bufptr = f->f_buf; - f->f_bufend = f->f_buf; - return 0; - } - if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { - PyErr_NoMemory(); - return -1; - } - Py_BEGIN_ALLOW_THREADS - chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize); - Py_END_ALLOW_THREADS - f->pos += chunksize; - if (bzerror == BZ_STREAM_END) { - f->size = f->pos; - f->mode = MODE_READ_EOF; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - Util_DropReadAhead(f); - return -1; - } - f->f_bufptr = f->f_buf; - f->f_bufend = f->f_buf + chunksize; - return 0; -} - -/* This is a hacked version of Python's - * fileobject.c:readahead_get_line_skip(). */ -static PyBytesObject * -Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize) -{ - PyBytesObject* s; - char *bufptr; - char *buf; - int len; - - if (f->f_buf == NULL) - if (Util_ReadAhead(f, bufsize) < 0) - return NULL; - - len = f->f_bufend - f->f_bufptr; - if (len == 0) - return (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, skip); - bufptr = memchr(f->f_bufptr, '\n', len); - if (bufptr != NULL) { - bufptr++; /* Count the '\n' */ - len = bufptr - f->f_bufptr; - s = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, skip+len); - if (s == NULL) - return NULL; - memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len); - f->f_bufptr = bufptr; - if (bufptr == f->f_bufend) - Util_DropReadAhead(f); - } else { - bufptr = f->f_bufptr; - buf = f->f_buf; - f->f_buf = NULL; /* Force new readahead buffer */ - s = Util_ReadAheadGetLineSkip(f, skip+len, - bufsize + (bufsize>>2)); - if (s == NULL) { - PyMem_Free(buf); - return NULL; - } - memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len); - PyMem_Free(buf); - } - return s; -} - -/* ===================================================================== */ -/* Methods of BZ2File. */ - -PyDoc_STRVAR(BZ2File_read__doc__, -"read([size]) -> string\n\ -\n\ -Read at most size uncompressed bytes, returned as a string. If the size\n\ -argument is negative or omitted, read until EOF is reached.\n\ -"); - -/* This is a hacked version of Python's fileobject.c:file_read(). */ -static PyObject * -BZ2File_read(BZ2FileObject *self, PyObject *args) -{ - long bytesrequested = -1; - size_t bytesread, buffersize, chunksize; - int bzerror; - PyObject *ret = NULL; - - if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) - return NULL; - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_READ: - break; - case MODE_READ_EOF: - ret = PyBytes_FromStringAndSize("", 0); - goto cleanup; - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - default: - PyErr_SetString(PyExc_IOError, - "file is not ready for reading"); - goto cleanup; - } - - /* refuse to mix with f.next() */ - if (check_iterbuffered(self)) - goto cleanup; - - if (bytesrequested < 0) - buffersize = Util_NewBufferSize((size_t)0); - else - buffersize = bytesrequested; - if (buffersize > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "requested number of bytes is " - "more than a Python string can hold"); - goto cleanup; - } - ret = PyBytes_FromStringAndSize((char *)NULL, buffersize); - if (ret == NULL || buffersize == 0) - goto cleanup; - bytesread = 0; - - for (;;) { - Py_BEGIN_ALLOW_THREADS - chunksize = BZ2_bzRead(&bzerror, self->fp, - BUF(ret)+bytesread, - buffersize-bytesread); - self->pos += chunksize; - Py_END_ALLOW_THREADS - bytesread += chunksize; - if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - break; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - Py_DECREF(ret); - ret = NULL; - goto cleanup; - } - if (bytesrequested < 0) { - buffersize = Util_NewBufferSize(buffersize); - if (_PyBytes_Resize(&ret, buffersize) < 0) { - ret = NULL; - goto cleanup; - } - } else { - break; - } - } - if (bytesread != buffersize) { - if (_PyBytes_Resize(&ret, bytesread) < 0) { - ret = NULL; - } - } - -cleanup: - RELEASE_LOCK(self); - return ret; -} - -PyDoc_STRVAR(BZ2File_readline__doc__, -"readline([size]) -> string\n\ -\n\ -Return the next line from the file, as a string, retaining newline.\n\ -A non-negative size argument will limit the maximum number of bytes to\n\ -return (an incomplete line may be returned then). Return an empty\n\ -string at EOF.\n\ -"); - -static PyObject * -BZ2File_readline(BZ2FileObject *self, PyObject *args) -{ - PyObject *ret = NULL; - int sizehint = -1; - - if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) - return NULL; - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_READ: - break; - case MODE_READ_EOF: - ret = PyBytes_FromStringAndSize("", 0); - goto cleanup; - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - default: - PyErr_SetString(PyExc_IOError, - "file is not ready for reading"); - goto cleanup; - } - - /* refuse to mix with f.next() */ - if (check_iterbuffered(self)) - goto cleanup; - - if (sizehint == 0) - ret = PyBytes_FromStringAndSize("", 0); - else - ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); - -cleanup: - RELEASE_LOCK(self); - return ret; -} - -PyDoc_STRVAR(BZ2File_readlines__doc__, -"readlines([size]) -> list\n\ -\n\ -Call readline() repeatedly and return a list of lines read.\n\ -The optional size argument, if given, is an approximate bound on the\n\ -total number of bytes in the lines returned.\n\ -"); - -/* This is a hacked version of Python's fileobject.c:file_readlines(). */ -static PyObject * -BZ2File_readlines(BZ2FileObject *self, PyObject *args) -{ - long sizehint = 0; - PyObject *list = NULL; - PyObject *line; - char small_buffer[SMALLCHUNK]; - char *buffer = small_buffer; - size_t buffersize = SMALLCHUNK; - PyObject *big_buffer = NULL; - size_t nfilled = 0; - size_t nread; - size_t totalread = 0; - char *p, *q, *end; - int err; - int shortread = 0; - int bzerror; - - if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) - return NULL; - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_READ: - break; - case MODE_READ_EOF: - list = PyList_New(0); - goto cleanup; - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - default: - PyErr_SetString(PyExc_IOError, - "file is not ready for reading"); - goto cleanup; - } - - /* refuse to mix with f.next() */ - if (check_iterbuffered(self)) - goto cleanup; - - if ((list = PyList_New(0)) == NULL) - goto cleanup; - - for (;;) { - Py_BEGIN_ALLOW_THREADS - nread = BZ2_bzRead(&bzerror, self->fp, - buffer+nfilled, buffersize-nfilled); - self->pos += nread; - Py_END_ALLOW_THREADS - if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - if (nread == 0) { - sizehint = 0; - break; - } - shortread = 1; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - error: - Py_DECREF(list); - list = NULL; - goto cleanup; - } - totalread += nread; - p = memchr(buffer+nfilled, '\n', nread); - if (!shortread && p == NULL) { - /* Need a larger buffer to fit this line */ - nfilled += nread; - buffersize *= 2; - if (buffersize > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "line is longer than a Python string can hold"); - goto error; - } - if (big_buffer == NULL) { - /* Create the big buffer */ - big_buffer = PyBytes_FromStringAndSize( - NULL, buffersize); - if (big_buffer == NULL) - goto error; - buffer = PyBytes_AS_STRING(big_buffer); - memcpy(buffer, small_buffer, nfilled); - } - else { - /* Grow the big buffer */ - if (_PyBytes_Resize(&big_buffer, buffersize) < 0){ - big_buffer = NULL; - goto error; - } - buffer = PyBytes_AS_STRING(big_buffer); - } - continue; - } - end = buffer+nfilled+nread; - q = buffer; - while (p != NULL) { - /* Process complete lines */ - p++; - line = PyBytes_FromStringAndSize(q, p-q); - if (line == NULL) - goto error; - err = PyList_Append(list, line); - Py_DECREF(line); - if (err != 0) - goto error; - q = p; - p = memchr(q, '\n', end-q); - } - /* Move the remaining incomplete line to the start */ - nfilled = end-q; - memmove(buffer, q, nfilled); - if (sizehint > 0) - if (totalread >= (size_t)sizehint) - break; - if (shortread) { - sizehint = 0; - break; - } - } - if (nfilled != 0) { - /* Partial last line */ - line = PyBytes_FromStringAndSize(buffer, nfilled); - if (line == NULL) - goto error; - if (sizehint > 0) { - /* Need to complete the last line */ - PyObject *rest = Util_GetLine(self, 0); - if (rest == NULL) { - Py_DECREF(line); - goto error; - } - PyBytes_Concat(&line, rest); - Py_DECREF(rest); - if (line == NULL) - goto error; - } - err = PyList_Append(list, line); - Py_DECREF(line); - if (err != 0) - goto error; - } - - cleanup: - RELEASE_LOCK(self); - if (big_buffer) { - Py_DECREF(big_buffer); - } - return list; -} - -PyDoc_STRVAR(BZ2File_write__doc__, -"write(data) -> None\n\ -\n\ -Write the 'data' string to file. Note that due to buffering, close() may\n\ -be needed before the file on disk reflects the data written.\n\ -"); - -/* This is a hacked version of Python's fileobject.c:file_write(). */ -static PyObject * -BZ2File_write(BZ2FileObject *self, PyObject *args) -{ - PyObject *ret = NULL; - Py_buffer pbuf; - char *buf; - int len; - int bzerror; - - if (!PyArg_ParseTuple(args, "y*:write", &pbuf)) - return NULL; - buf = pbuf.buf; - len = pbuf.len; - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_WRITE: - break; - - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - - default: - PyErr_SetString(PyExc_IOError, - "file is not ready for writing"); - goto cleanup; - } - - Py_BEGIN_ALLOW_THREADS - BZ2_bzWrite (&bzerror, self->fp, buf, len); - self->pos += len; - Py_END_ALLOW_THREADS - - if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto cleanup; - } - - Py_INCREF(Py_None); - ret = Py_None; - -cleanup: - PyBuffer_Release(&pbuf); - RELEASE_LOCK(self); - return ret; -} - -PyDoc_STRVAR(BZ2File_writelines__doc__, -"writelines(sequence_of_strings) -> None\n\ -\n\ -Write the sequence of strings to the file. Note that newlines are not\n\ -added. The sequence can be any iterable object producing strings. This is\n\ -equivalent to calling write() for each string.\n\ -"); - -/* This is a hacked version of Python's fileobject.c:file_writelines(). */ -static PyObject * -BZ2File_writelines(BZ2FileObject *self, PyObject *seq) -{ -#define CHUNKSIZE 1000 - PyObject *list = NULL; - PyObject *iter = NULL; - PyObject *ret = NULL; - PyObject *line; - int i, j, index, len, islist; - int bzerror; - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_WRITE: - break; - - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto error; - - default: - PyErr_SetString(PyExc_IOError, - "file is not ready for writing"); - goto error; - } - - islist = PyList_Check(seq); - if (!islist) { - iter = PyObject_GetIter(seq); - if (iter == NULL) { - PyErr_SetString(PyExc_TypeError, - "writelines() requires an iterable argument"); - goto error; - } - list = PyList_New(CHUNKSIZE); - if (list == NULL) - goto error; - } - - /* Strategy: slurp CHUNKSIZE lines into a private list, - checking that they are all strings, then write that list - without holding the interpreter lock, then come back for more. */ - for (index = 0; ; index += CHUNKSIZE) { - if (islist) { - Py_XDECREF(list); - list = PyList_GetSlice(seq, index, index+CHUNKSIZE); - if (list == NULL) - goto error; - j = PyList_GET_SIZE(list); - } - else { - for (j = 0; j < CHUNKSIZE; j++) { - line = PyIter_Next(iter); - if (line == NULL) { - if (PyErr_Occurred()) - goto error; - break; - } - PyList_SetItem(list, j, line); - } - } - if (j == 0) - break; - - /* Check that all entries are indeed byte strings. If not, - apply the same rules as for file.write() and - convert the rets to strings. This is slow, but - seems to be the only way since all conversion APIs - could potentially execute Python code. */ - for (i = 0; i < j; i++) { - PyObject *v = PyList_GET_ITEM(list, i); - if (!PyBytes_Check(v)) { - const char *buffer; - Py_ssize_t len; - if (PyObject_AsCharBuffer(v, &buffer, &len)) { - PyErr_SetString(PyExc_TypeError, - "writelines() " - "argument must be " - "a sequence of " - "bytes objects"); - goto error; - } - line = PyBytes_FromStringAndSize(buffer, - len); - if (line == NULL) - goto error; - Py_DECREF(v); - PyList_SET_ITEM(list, i, line); - } - } - - /* Since we are releasing the global lock, the - following code may *not* execute Python code. */ - Py_BEGIN_ALLOW_THREADS - for (i = 0; i < j; i++) { - line = PyList_GET_ITEM(list, i); - len = PyBytes_GET_SIZE(line); - BZ2_bzWrite (&bzerror, self->fp, - PyBytes_AS_STRING(line), len); - if (bzerror != BZ_OK) { - Py_BLOCK_THREADS - Util_CatchBZ2Error(bzerror); - goto error; - } - } - Py_END_ALLOW_THREADS - - if (j < CHUNKSIZE) - break; - } - - Py_INCREF(Py_None); - ret = Py_None; - - error: - RELEASE_LOCK(self); - Py_XDECREF(list); - Py_XDECREF(iter); - return ret; -#undef CHUNKSIZE -} - -PyDoc_STRVAR(BZ2File_seek__doc__, -"seek(offset [, whence]) -> None\n\ -\n\ -Move to new file position. Argument offset is a byte count. Optional\n\ -argument whence defaults to 0 (offset from start of file, offset\n\ -should be >= 0); other values are 1 (move relative to current position,\n\ -positive or negative), and 2 (move relative to end of file, usually\n\ -negative, although many platforms allow seeking beyond the end of a file).\n\ -\n\ -Note that seeking of bz2 files is emulated, and depending on the parameters\n\ -the operation may be extremely slow.\n\ -"); - -static PyObject * -BZ2File_seek(BZ2FileObject *self, PyObject *args) -{ - int where = 0; - PyObject *offobj; - Py_off_t offset; - char small_buffer[SMALLCHUNK]; - char *buffer = small_buffer; - size_t buffersize = SMALLCHUNK; - Py_off_t bytesread = 0; - size_t readsize; - int chunksize; - int bzerror; - PyObject *ret = NULL; - - if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) - return NULL; -#if !defined(HAVE_LARGEFILE_SUPPORT) - offset = PyLong_AsLong(offobj); -#else - offset = PyLong_Check(offobj) ? - PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj); -#endif - if (PyErr_Occurred()) - return NULL; - - ACQUIRE_LOCK(self); - Util_DropReadAhead(self); - switch (self->mode) { - case MODE_READ: - case MODE_READ_EOF: - break; - - case MODE_CLOSED: - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - - default: - PyErr_SetString(PyExc_IOError, - "seek works only while reading"); - goto cleanup; - } - - if (where == 2) { - if (self->size == -1) { - assert(self->mode != MODE_READ_EOF); - for (;;) { - Py_BEGIN_ALLOW_THREADS - chunksize = BZ2_bzRead(&bzerror, self->fp, - buffer, buffersize); - self->pos += chunksize; - Py_END_ALLOW_THREADS - - bytesread += chunksize; - if (bzerror == BZ_STREAM_END) { - break; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto cleanup; - } - } - self->mode = MODE_READ_EOF; - self->size = self->pos; - bytesread = 0; - } - offset = self->size + offset; - } else if (where == 1) { - offset = self->pos + offset; - } - - /* Before getting here, offset must be the absolute position the file - * pointer should be set to. */ - - if (offset >= self->pos) { - /* we can move forward */ - offset -= self->pos; - } else { - /* we cannot move back, so rewind the stream */ - BZ2_bzReadClose(&bzerror, self->fp); - if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto cleanup; - } - rewind(self->rawfp); - self->pos = 0; - self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp, - 0, 0, NULL, 0); - if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto cleanup; - } - self->mode = MODE_READ; - } - - if (offset <= 0 || self->mode == MODE_READ_EOF) - goto exit; - - /* Before getting here, offset must be set to the number of bytes - * to walk forward. */ - for (;;) { - if (offset-bytesread > buffersize) - readsize = buffersize; - else - /* offset might be wider that readsize, but the result - * of the subtraction is bound by buffersize (see the - * condition above). buffersize is 8192. */ - readsize = (size_t)(offset-bytesread); - Py_BEGIN_ALLOW_THREADS - chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize); - self->pos += chunksize; - Py_END_ALLOW_THREADS - bytesread += chunksize; - if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - break; - } else if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto cleanup; - } - if (bytesread == offset) - break; - } - -exit: - Py_INCREF(Py_None); - ret = Py_None; - -cleanup: - RELEASE_LOCK(self); - return ret; -} - -PyDoc_STRVAR(BZ2File_tell__doc__, -"tell() -> int\n\ -\n\ -Return the current file position, an integer (may be a long integer).\n\ -"); - -static PyObject * -BZ2File_tell(BZ2FileObject *self, PyObject *args) -{ - PyObject *ret = NULL; - - if (self->mode == MODE_CLOSED) { - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - goto cleanup; - } - -#if !defined(HAVE_LARGEFILE_SUPPORT) - ret = PyLong_FromLong(self->pos); -#else - ret = PyLong_FromLongLong(self->pos); -#endif - -cleanup: - return ret; -} - -PyDoc_STRVAR(BZ2File_close__doc__, -"close() -> None or (perhaps) an integer\n\ -\n\ -Close the file. Sets data attribute .closed to true. A closed file\n\ -cannot be used for further I/O operations. close() may be called more\n\ -than once without error.\n\ -"); - -static PyObject * -BZ2File_close(BZ2FileObject *self) -{ - PyObject *ret = NULL; - int bzerror = BZ_OK; - - if (self->mode == MODE_CLOSED) { - Py_RETURN_NONE; - } - - ACQUIRE_LOCK(self); - switch (self->mode) { - case MODE_READ: - case MODE_READ_EOF: - BZ2_bzReadClose(&bzerror, self->fp); - break; - case MODE_WRITE: - BZ2_bzWriteClose(&bzerror, self->fp, - 0, NULL, NULL); - break; - } - self->mode = MODE_CLOSED; - fclose(self->rawfp); - self->rawfp = NULL; - if (bzerror == BZ_OK) { - Py_INCREF(Py_None); - ret = Py_None; - } - else { - Util_CatchBZ2Error(bzerror); - } - - RELEASE_LOCK(self); - return ret; -} - -PyDoc_STRVAR(BZ2File_enter_doc, -"__enter__() -> self."); - -static PyObject * -BZ2File_enter(BZ2FileObject *self) -{ - if (self->mode == MODE_CLOSED) { - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - return NULL; - } - Py_INCREF(self); - return (PyObject *) self; -} - -PyDoc_STRVAR(BZ2File_exit_doc, -"__exit__(*excinfo) -> None. Closes the file."); - -static PyObject * -BZ2File_exit(BZ2FileObject *self, PyObject *args) -{ - PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL); - if (!ret) - /* If error occurred, pass through */ - return NULL; - Py_DECREF(ret); - Py_RETURN_NONE; -} - - -static PyObject *BZ2File_getiter(BZ2FileObject *self); - -static PyMethodDef BZ2File_methods[] = { - {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__}, - {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__}, - {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__}, - {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__}, - {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__}, - {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__}, - {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__}, - {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__}, - {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc}, - {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc}, - {NULL, NULL} /* sentinel */ -}; - - -/* ===================================================================== */ -/* Getters and setters of BZ2File. */ - -static PyObject * -BZ2File_get_closed(BZ2FileObject *self, void *closure) -{ - return PyLong_FromLong(self->mode == MODE_CLOSED); -} - -static PyGetSetDef BZ2File_getset[] = { - {"closed", (getter)BZ2File_get_closed, NULL, - "True if the file is closed"}, - {NULL} /* Sentinel */ -}; - - -/* ===================================================================== */ -/* Slot definitions for BZ2File_Type. */ - -static int -BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"filename", "mode", "buffering", - "compresslevel", 0}; - PyObject *name_obj = NULL; - char *name; - char *mode = "r"; - int buffering = -1; - int compresslevel = 9; - int bzerror; - int mode_char = 0; - - self->size = -1; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File", - kwlist, PyUnicode_FSConverter, &name_obj, - &mode, &buffering, - &compresslevel)) - return -1; - - name = PyBytes_AsString(name_obj); - if (compresslevel < 1 || compresslevel > 9) { - PyErr_SetString(PyExc_ValueError, - "compresslevel must be between 1 and 9"); - Py_DECREF(name_obj); - return -1; - } - - for (;;) { - int error = 0; - switch (*mode) { - case 'r': - case 'w': - if (mode_char) - error = 1; - mode_char = *mode; - break; - - case 'b': - break; - - default: - error = 1; - break; - } - if (error) { - PyErr_Format(PyExc_ValueError, - "invalid mode char %c", *mode); - Py_DECREF(name_obj); - return -1; - } - mode++; - if (*mode == '\0') - break; - } - - if (mode_char == 0) { - mode_char = 'r'; - } - - mode = (mode_char == 'r') ? "rb" : "wb"; - - self->rawfp = fopen(name, mode); - Py_DECREF(name_obj); - if (self->rawfp == NULL) { - PyErr_SetFromErrno(PyExc_IOError); - return -1; - } - /* XXX Ignore buffering */ - - /* From now on, we have stuff to dealloc, so jump to error label - * instead of returning */ - -#ifdef WITH_THREAD - self->lock = PyThread_allocate_lock(); - if (!self->lock) { - PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); - goto error; - } -#endif - - if (mode_char == 'r') - self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp, - 0, 0, NULL, 0); - else - self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp, - compresslevel, 0, 0); - - if (bzerror != BZ_OK) { - Util_CatchBZ2Error(bzerror); - goto error; - } - - self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE; - - return 0; - -error: - fclose(self->rawfp); - self->rawfp = NULL; -#ifdef WITH_THREAD - if (self->lock) { - PyThread_free_lock(self->lock); - self->lock = NULL; - } -#endif - return -1; -} - -static void -BZ2File_dealloc(BZ2FileObject *self) -{ - int bzerror; -#ifdef WITH_THREAD - if (self->lock) - PyThread_free_lock(self->lock); -#endif - switch (self->mode) { - case MODE_READ: - case MODE_READ_EOF: - BZ2_bzReadClose(&bzerror, self->fp); - break; - case MODE_WRITE: - BZ2_bzWriteClose(&bzerror, self->fp, - 0, NULL, NULL); - break; - } - Util_DropReadAhead(self); - if (self->rawfp != NULL) - fclose(self->rawfp); - Py_TYPE(self)->tp_free((PyObject *)self); -} - -/* This is a hacked version of Python's fileobject.c:file_getiter(). */ -static PyObject * -BZ2File_getiter(BZ2FileObject *self) -{ - if (self->mode == MODE_CLOSED) { - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - return NULL; - } - Py_INCREF((PyObject*)self); - return (PyObject *)self; -} - -/* This is a hacked version of Python's fileobject.c:file_iternext(). */ -#define READAHEAD_BUFSIZE 8192 -static PyObject * -BZ2File_iternext(BZ2FileObject *self) -{ - PyBytesObject* ret; - ACQUIRE_LOCK(self); - if (self->mode == MODE_CLOSED) { - RELEASE_LOCK(self); - PyErr_SetString(PyExc_ValueError, - "I/O operation on closed file"); - return NULL; - } - ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); - RELEASE_LOCK(self); - if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) { - Py_XDECREF(ret); - return NULL; - } - return (PyObject *)ret; -} - -/* ===================================================================== */ -/* BZ2File_Type definition. */ - -PyDoc_VAR(BZ2File__doc__) = -PyDoc_STR( -"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\ -\n\ -Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\ -writing. When opened for writing, the file will be created if it doesn't\n\ -exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ -unbuffered, and larger numbers specify the buffer size. If compresslevel\n\ -is given, must be a number between 1 and 9.\n\ -Data read is always returned in bytes; data written ought to be bytes.\n\ -"); - -static PyTypeObject BZ2File_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "bz2.BZ2File", /*tp_name*/ - sizeof(BZ2FileObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)BZ2File_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - PyObject_GenericGetAttr,/*tp_getattro*/ - PyObject_GenericSetAttr,/*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ - BZ2File__doc__, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - (getiterfunc)BZ2File_getiter, /*tp_iter*/ - (iternextfunc)BZ2File_iternext, /*tp_iternext*/ - BZ2File_methods, /*tp_methods*/ - 0, /*tp_members*/ - BZ2File_getset, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - (initproc)BZ2File_init, /*tp_init*/ - PyType_GenericAlloc, /*tp_alloc*/ - PyType_GenericNew, /*tp_new*/ - PyObject_Free, /*tp_free*/ - 0, /*tp_is_gc*/ -}; - /* ===================================================================== */ /* Methods of BZ2Comp. */ @@ -1654,7 +412,7 @@ static PyTypeObject BZ2Comp_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "bz2.BZ2Compressor", /*tp_name*/ + "_bz2.BZ2Compressor", /*tp_name*/ sizeof(BZ2CompObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)BZ2Comp_dealloc, /*tp_dealloc*/ @@ -1883,7 +641,7 @@ static PyTypeObject BZ2Decomp_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "bz2.BZ2Decompressor", /*tp_name*/ + "_bz2.BZ2Decompressor", /*tp_name*/ sizeof(BZ2DecompObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/ @@ -2140,7 +898,7 @@ static struct PyModuleDef bz2module = { PyModuleDef_HEAD_INIT, - "bz2", + "_bz2", bz2__doc__, -1, bz2_methods, @@ -2151,12 +909,10 @@ }; PyMODINIT_FUNC -PyInit_bz2(void) +PyInit__bz2(void) { PyObject *m; - if (PyType_Ready(&BZ2File_Type) < 0) - return NULL; if (PyType_Ready(&BZ2Comp_Type) < 0) return NULL; if (PyType_Ready(&BZ2Decomp_Type) < 0) @@ -2168,9 +924,6 @@ PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__)); - Py_INCREF(&BZ2File_Type); - PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type); - Py_INCREF(&BZ2Comp_Type); PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type); diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -1233,7 +1233,7 @@ bz2_extra_link_args = ('-Wl,-search_paths_first',) else: bz2_extra_link_args = () - exts.append( Extension('bz2', ['bz2module.c'], + exts.append( Extension('_bz2', ['_bz2module.c'], libraries = ['bz2'], extra_link_args = bz2_extra_link_args) ) else: