# HG changeset patch # User Antoine Pitrou # Date 1244327956 -7200 diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Doc/library/io.rst --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -573,6 +573,10 @@ Text I/O The name of the encoding used to decode the stream's bytes into strings, and to encode strings into bytes. + .. attribute:: errors + + The error setting of the decoder or encoder. + .. attribute:: newlines A string, a tuple of strings, or ``None``, indicating the newlines @@ -625,13 +629,9 @@ Text I/O If *line_buffering* is ``True``, :meth:`flush` is implied when a call to write contains a newline character. - :class:`TextIOWrapper` provides these data attributes in addition to those of + :class:`TextIOWrapper` provides one attribute in addition to those of :class:`TextIOBase` and its parents: - .. attribute:: errors - - The encoding and decoding error setting. - .. attribute:: line_buffering Whether line buffering is enabled. diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/_pyio.py --- /dev/null +++ b/Lib/_pyio.py @@ -0,0 +1,1962 @@ +""" +Python implementation of the io module. +""" + +from __future__ import print_function +from __future__ import unicode_literals + +import os +import abc +import codecs +import warnings +# Import _thread instead of threading to reduce startup cost +try: + from thread import allocate_lock as Lock +except ImportError: + from dummy_thread import allocate_lock as Lock + +import io +from io import __all__ +from io import SEEK_SET, SEEK_CUR, SEEK_END + +__metaclass__ = type + +# open() uses st_blksize whenever we can +DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes + +# NOTE: Base classes defined here are registered with the "official" ABCs +# defined in io.py. We don't use real inheritance though, because we don't +# want to inherit the C implementations. + + +class BlockingIOError(IOError): + + """Exception raised when I/O would block on a non-blocking I/O stream.""" + + def __init__(self, errno, strerror, characters_written=0): + super(IOError, self).__init__(errno, strerror) + if not isinstance(characters_written, (int, long)): + raise TypeError("characters_written must be a integer") + self.characters_written = characters_written + + +def open(file, mode="r", buffering=None, + encoding=None, errors=None, + newline=None, closefd=True): + + r"""Open file and return a stream. Raise IOError upon failure. + + file is either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened or an integer file descriptor of the file to be + wrapped. (If a file descriptor is given, it is closed when the + returned I/O object is closed, unless closefd is set to False.) + + mode is an optional string that specifies the mode in which the file + is opened. It defaults to 'r' which means open for reading in text + mode. Other common values are 'w' for writing (truncating the file if + it already exists), and 'a' for appending (which on some Unix systems, + means that all writes append to the end of the file regardless of the + current seek position). In text mode, if encoding is not specified the + encoding used is platform dependent. (For reading and writing raw + bytes use binary mode and leave encoding unspecified.) The available + modes are: + + ========= =============================================================== + Character Meaning + --------- --------------------------------------------------------------- + 'r' open for reading (default) + 'w' open for writing, truncating the file first + 'a' open for writing, appending to the end of the file if it exists + 'b' binary mode + 't' text mode (default) + '+' open a disk file for updating (reading and writing) + 'U' universal newline mode (for backwards compatibility; unneeded + for new code) + ========= =============================================================== + + The default mode is 'rt' (open for reading text). For binary random + access, the mode 'w+b' opens and truncates the file to 0 bytes, while + 'r+b' opens the file without truncation. + + Python distinguishes between files opened in binary and text modes, + even when the underlying operating system doesn't. Files opened in + binary mode (appending 'b' to the mode argument) return contents as + bytes objects without any decoding. In text mode (the default, or when + 't' is appended to the mode argument), the contents of the file are + returned as strings, the bytes having been first decoded using a + platform-dependent encoding or using the specified encoding if given. + + buffering is an optional integer used to set the buffering policy. By + default full buffering is on. Pass 0 to switch buffering off (only + allowed in binary mode), 1 to set line buffering, and an integer > 1 + for full buffering. + + encoding is the name of the encoding used to decode or encode the + file. This should only be used in text mode. The default encoding is + platform dependent, but any encoding supported by Python can be + passed. See the codecs module for the list of supported encodings. + + errors is an optional string that specifies how encoding errors are to + be handled---this argument should not be used in binary mode. Pass + 'strict' to raise a ValueError exception if there is an encoding error + (the default of None has the same effect), or pass 'ignore' to ignore + errors. (Note that ignoring encoding errors can lead to data loss.) + See the documentation for codecs.register for a list of the permitted + encoding error strings. + + newline controls how universal newlines works (it only applies to text + mode). It can be None, '', '\n', '\r', and '\r\n'. It works as + follows: + + * On input, if newline is None, universal newlines mode is + enabled. Lines in the input can end in '\n', '\r', or '\r\n', and + these are translated into '\n' before being returned to the + caller. If it is '', universal newline mode is enabled, but line + endings are returned to the caller untranslated. If it has any of + the other legal values, input lines are only terminated by the given + string, and the line ending is returned to the caller untranslated. + + * On output, if newline is None, any '\n' characters written are + translated to the system default line separator, os.linesep. If + newline is '', no translation takes place. If newline is any of the + other legal values, any '\n' characters written are translated to + the given string. + + If closefd is False, the underlying file descriptor will be kept open + when the file is closed. This does not work when a file name is given + and must be True in that case. + + open() returns a file object whose type depends on the mode, and + through which the standard file operations such as reading and writing + are performed. When open() is used to open a file in a text mode ('w', + 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open + a file in a binary mode, the returned class varies: in read binary + mode, it returns a BufferedReader; in write binary and append binary + modes, it returns a BufferedWriter, and in read/write mode, it returns + a BufferedRandom. + + It is also possible to use a string or bytearray as a file for both + reading and writing. For strings StringIO can be used like a file + opened in a text mode, and for bytes a BytesIO can be used like a file + opened in a binary mode. + """ + if not isinstance(file, (basestring, int, long)): + raise TypeError("invalid file: %r" % file) + if not isinstance(mode, basestring): + raise TypeError("invalid mode: %r" % mode) + if buffering is not None and not isinstance(buffering, (int, long)): + raise TypeError("invalid buffering: %r" % buffering) + if encoding is not None and not isinstance(encoding, basestring): + raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, basestring): + raise TypeError("invalid errors: %r" % errors) + modes = set(mode) + if modes - set("arwb+tU") or len(mode) > len(modes): + raise ValueError("invalid mode: %r" % mode) + reading = "r" in modes + writing = "w" in modes + appending = "a" in modes + updating = "+" in modes + text = "t" in modes + binary = "b" in modes + if "U" in modes: + if writing or appending: + raise ValueError("can't use U and writing mode at once") + reading = True + if text and binary: + raise ValueError("can't have text and binary mode at once") + if reading + writing + appending > 1: + raise ValueError("can't have read/write/append mode at once") + if not (reading or writing or appending): + raise ValueError("must have exactly one of read/write/append mode") + if binary and encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + if binary and newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + raw = FileIO(file, + (reading and "r" or "") + + (writing and "w" or "") + + (appending and "a" or "") + + (updating and "+" or ""), + closefd) + if buffering is None: + buffering = -1 + line_buffering = False + if buffering == 1 or buffering < 0 and raw.isatty(): + buffering = -1 + line_buffering = True + if buffering < 0: + buffering = DEFAULT_BUFFER_SIZE + try: + bs = os.fstat(raw.fileno()).st_blksize + except (os.error, AttributeError): + pass + else: + if bs > 1: + buffering = bs + if buffering < 0: + raise ValueError("invalid buffering size") + if buffering == 0: + if binary: + return raw + raise ValueError("can't have unbuffered text I/O") + if updating: + buffer = BufferedRandom(raw, buffering) + elif writing or appending: + buffer = BufferedWriter(raw, buffering) + elif reading: + buffer = BufferedReader(raw, buffering) + else: + raise ValueError("unknown mode: %r" % mode) + if binary: + return buffer + text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) + text.mode = mode + return text + + +class DocDescriptor: + """Helper for builtins.open.__doc__ + """ + def __get__(self, obj, typ): + return ( + "open(file, mode='r', buffering=None, encoding=None, " + "errors=None, newline=None, closefd=True)\n\n" + + open.__doc__) + +class OpenWrapper: + """Wrapper for builtins.open + + Trick so that open won't become a bound method when stored + as a class variable (as dbm.dumb does). + + See initstdio() in Python/pythonrun.c. + """ + __doc__ = DocDescriptor() + + def __new__(cls, *args, **kwargs): + return open(*args, **kwargs) + + +class UnsupportedOperation(ValueError, IOError): + pass + + +class IOBase: + __metaclass__ = abc.ABCMeta + + """The abstract base class for all I/O classes, acting on streams of + bytes. There is no public constructor. + + This class provides dummy implementations for many methods that + derived classes can override selectively; the default implementations + represent a file that cannot be read, written or seeked. + + Even though IOBase does not declare read, readinto, or write because + their signatures will vary, implementations and clients should + consider those methods part of the interface. Also, implementations + may raise a IOError when operations they do not support are called. + + The basic type used for binary data read from or written to a file is + bytes. bytearrays are accepted too, and in some cases (such as + readinto) needed. Text I/O classes work with str data. + + Note that calling any method (even inquiries) on a closed stream is + undefined. Implementations may raise IOError in this case. + + IOBase (and its subclasses) support the iterator protocol, meaning + that an IOBase object can be iterated over yielding the lines in a + stream. + + IOBase also supports the :keyword:`with` statement. In this example, + fp is closed after the suite of the with statement is complete: + + with open('spam.txt', 'r') as fp: + fp.write('Spam and eggs!') + """ + + ### Internal ### + + def _unsupported(self, name): + """Internal: raise an exception for unsupported operations.""" + raise UnsupportedOperation("%s.%s() not supported" % + (self.__class__.__name__, name)) + + ### Positioning ### + + def seek(self, pos, whence=0): + """Change stream position. + + Change the stream position to byte offset offset. offset is + interpreted relative to the position indicated by whence. Values + for whence are: + + * 0 -- start of stream (the default); offset should be zero or positive + * 1 -- current stream position; offset may be negative + * 2 -- end of stream; offset is usually negative + + Return the new absolute position. + """ + self._unsupported("seek") + + def tell(self): + """Return current stream position.""" + return self.seek(0, 1) + + def truncate(self, pos=None): + """Truncate file to size bytes. + + Size defaults to the current IO position as reported by tell(). Return + the new size. + """ + self._unsupported("truncate") + + ### Flush and close ### + + def flush(self): + """Flush write buffers, if applicable. + + This is not implemented for read-only and non-blocking streams. + """ + # XXX Should this return the number of bytes written??? + + __closed = False + + def close(self): + """Flush and close the IO object. + + This method has no effect if the file is already closed. + """ + if not self.__closed: + try: + self.flush() + except IOError: + pass # If flush() fails, just give up + self.__closed = True + + def __del__(self): + """Destructor. Calls close().""" + # The try/except block is in case this is called at program + # exit time, when it's possible that globals have already been + # deleted, and then the close() call might fail. Since + # there's nothing we can do about such failures and they annoy + # the end users, we suppress the traceback. + try: + self.close() + except: + pass + + ### Inquiries ### + + def seekable(self): + """Return whether object supports random access. + + If False, seek(), tell() and truncate() will raise IOError. + This method may need to do a test seek(). + """ + return False + + def _checkSeekable(self, msg=None): + """Internal: raise an IOError if file is not seekable + """ + if not self.seekable(): + raise IOError("File or stream is not seekable." + if msg is None else msg) + + + def readable(self): + """Return whether object was opened for reading. + + If False, read() will raise IOError. + """ + return False + + def _checkReadable(self, msg=None): + """Internal: raise an IOError if file is not readable + """ + if not self.readable(): + raise IOError("File or stream is not readable." + if msg is None else msg) + + def writable(self): + """Return whether object was opened for writing. + + If False, write() and truncate() will raise IOError. + """ + return False + + def _checkWritable(self, msg=None): + """Internal: raise an IOError if file is not writable + """ + if not self.writable(): + raise IOError("File or stream is not writable." + if msg is None else msg) + + @property + def closed(self): + """closed: bool. True iff the file has been closed. + + For backwards compatibility, this is a property, not a predicate. + """ + return self.__closed + + def _checkClosed(self, msg=None): + """Internal: raise an ValueError if file is closed + """ + if self.closed: + raise ValueError("I/O operation on closed file." + if msg is None else msg) + + ### Context manager ### + + def __enter__(self): + """Context management protocol. Returns self.""" + self._checkClosed() + return self + + def __exit__(self, *args): + """Context management protocol. Calls close()""" + self.close() + + ### Lower-level APIs ### + + # XXX Should these be present even if unimplemented? + + def fileno(self): + """Returns underlying file descriptor if one exists. + + An IOError is raised if the IO object does not use a file descriptor. + """ + self._unsupported("fileno") + + def isatty(self): + """Return whether this is an 'interactive' stream. + + Return False if it can't be determined. + """ + self._checkClosed() + return False + + ### Readline[s] and writelines ### + + def readline(self, limit=-1): + r"""Read and return a line from the stream. + + If limit is specified, at most limit bytes will be read. + + The line terminator is always b'\n' for binary files; for text + files, the newlines argument to open can be used to select the line + terminator(s) recognized. + """ + # For backwards compatibility, a (slowish) readline(). + if hasattr(self, "peek"): + def nreadahead(): + readahead = self.peek(1) + if not readahead: + return 1 + n = (readahead.find(b"\n") + 1) or len(readahead) + if limit >= 0: + n = min(n, limit) + return n + else: + def nreadahead(): + return 1 + if limit is None: + limit = -1 + elif not isinstance(limit, (int, long)): + raise TypeError("limit must be an integer") + res = bytearray() + while limit < 0 or len(res) < limit: + b = self.read(nreadahead()) + if not b: + break + res += b + if res.endswith(b"\n"): + break + return bytes(res) + + def __iter__(self): + self._checkClosed() + return self + + def next(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def readlines(self, hint=None): + """Return a list of lines from the stream. + + hint can be specified to control the number of lines read: no more + lines will be read if the total size (in bytes/characters) of all + lines so far exceeds hint. + """ + if hint is not None and not isinstance(hint, (int, long)): + raise TypeError("integer or None expected") + if hint is None or hint <= 0: + return list(self) + n = 0 + lines = [] + for line in self: + lines.append(line) + n += len(line) + if n >= hint: + break + return lines + + def writelines(self, lines): + self._checkClosed() + for line in lines: + self.write(line) + +io.IOBase.register(IOBase) + + +class RawIOBase(IOBase): + + """Base class for raw binary I/O.""" + + # The read() method is implemented by calling readinto(); derived + # classes that want to support read() only need to implement + # readinto() as a primitive operation. In general, readinto() can be + # more efficient than read(). + + # (It would be tempting to also provide an implementation of + # readinto() in terms of read(), in case the latter is a more suitable + # primitive operation, but that would lead to nasty recursion in case + # a subclass doesn't implement either.) + + def read(self, n=-1): + """Read and return up to n bytes. + + Returns an empty bytes object on EOF, or None if the object is + set not to block and has no data to read. + """ + if n is None: + n = -1 + if n < 0: + return self.readall() + b = bytearray(n.__index__()) + n = self.readinto(b) + del b[n:] + return bytes(b) + + def readall(self): + """Read until EOF, using multiple read() call.""" + res = bytearray() + while True: + data = self.read(DEFAULT_BUFFER_SIZE) + if not data: + break + res += data + return bytes(res) + + def readinto(self, b): + """Read up to len(b) bytes into b. + + Returns number of bytes read (0 for EOF), or None if the object + is set not to block as has no data to read. + """ + self._unsupported("readinto") + + def write(self, b): + """Write the given buffer to the IO stream. + + Returns the number of bytes written, which may be less than len(b). + """ + self._unsupported("write") + +io.RawIOBase.register(RawIOBase) +from _io import FileIO +RawIOBase.register(FileIO) + + +class BufferedIOBase(IOBase): + + """Base class for buffered IO objects. + + The main difference with RawIOBase is that the read() method + supports omitting the size argument, and does not have a default + implementation that defers to readinto(). + + In addition, read(), readinto() and write() may raise + BlockingIOError if the underlying raw stream is in non-blocking + mode and not ready; unlike their raw counterparts, they will never + return None. + + A typical implementation should not inherit from a RawIOBase + implementation, but wrap one. + """ + + def read(self, n=None): + """Read and return up to n bytes. + + If the argument is omitted, None, or negative, reads and + returns all data until EOF. + + If the argument is positive, and the underlying raw stream is + not 'interactive', multiple raw reads may be issued to satisfy + the byte count (unless EOF is reached first). But for + interactive raw streams (XXX and for pipes?), at most one raw + read will be issued, and a short result does not imply that + EOF is imminent. + + Returns an empty bytes array on EOF. + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + self._unsupported("read") + + def read1(self, n=None): + """Read up to n bytes with at most one read() system call.""" + self._unsupported("read1") + + def readinto(self, b): + """Read up to len(b) bytes into b. + + Like read(), this may issue multiple reads to the underlying raw + stream, unless the latter is 'interactive'. + + Returns the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + # XXX This ought to work with anything that supports the buffer API + data = self.read(len(b)) + n = len(data) + try: + b[:n] = data + except TypeError as err: + import array + if not isinstance(b, array.array): + raise err + b[:n] = array.array(b'b', data) + return n + + def write(self, b): + """Write the given buffer to the IO stream. + + Return the number of bytes written, which is never less than + len(b). + + Raises BlockingIOError if the buffer is full and the + underlying raw stream cannot accept more data at the moment. + """ + self._unsupported("write") + + def detach(self): + """ + Separate the underlying raw stream from the buffer and return it. + + After the raw stream has been detached, the buffer is in an unusable + state. + """ + self._unsupported("detach") + +io.BufferedIOBase.register(BufferedIOBase) + + +class _BufferedIOMixin(BufferedIOBase): + + """A mixin implementation of BufferedIOBase with an underlying raw stream. + + This passes most requests on to the underlying raw stream. It + does *not* provide implementations of read(), readinto() or + write(). + """ + + def __init__(self, raw): + self.raw = raw + + ### Positioning ### + + def seek(self, pos, whence=0): + new_position = self.raw.seek(pos, whence) + if new_position < 0: + raise IOError("seek() returned an invalid position") + return new_position + + def tell(self): + pos = self.raw.tell() + if pos < 0: + raise IOError("tell() returned an invalid position") + return pos + + def truncate(self, pos=None): + # Flush the stream. We're mixing buffered I/O with lower-level I/O, + # and a flush may be necessary to synch both views of the current + # file state. + self.flush() + + if pos is None: + pos = self.tell() + # XXX: Should seek() be used, instead of passing the position + # XXX directly to truncate? + return self.raw.truncate(pos) + + ### Flush and close ### + + def flush(self): + self.raw.flush() + + def close(self): + if not self.closed and self.raw is not None: + try: + self.flush() + except IOError: + pass # If flush() fails, just give up + self.raw.close() + + def detach(self): + if self.raw is None: + raise ValueError("raw stream already detached") + self.flush() + raw = self.raw + self.raw = None + return raw + + ### Inquiries ### + + def seekable(self): + return self.raw.seekable() + + def readable(self): + return self.raw.readable() + + def writable(self): + return self.raw.writable() + + @property + def closed(self): + return self.raw.closed + + @property + def name(self): + return self.raw.name + + @property + def mode(self): + return self.raw.mode + + def __repr__(self): + clsname = self.__class__.__name__ + try: + name = self.name + except AttributeError: + return "<_pyio.{0}>".format(clsname) + else: + return "<_pyio.{0} name={1!r}>".format(clsname, name) + + ### Lower-level APIs ### + + def fileno(self): + return self.raw.fileno() + + def isatty(self): + return self.raw.isatty() + + +class BytesIO(BufferedIOBase): + + """Buffered I/O implementation using an in-memory bytes buffer.""" + + def __init__(self, initial_bytes=None): + buf = bytearray() + if initial_bytes is not None: + buf.extend(initial_bytes) + self._buffer = buf + self._pos = 0 + + def getvalue(self): + """Return the bytes value (contents) of the buffer + """ + if self.closed: + raise ValueError("getvalue on closed file") + return bytes(self._buffer) + + def read(self, n=None): + if self.closed: + raise ValueError("read from closed file") + if n is None: + n = -1 + if not isinstance(n, (int, long)): + raise TypeError("integer argument expected, got {0!r}".format( + type(n))) + if n < 0: + n = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + n) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) + + def read1(self, n): + """This is the same as read. + """ + return self.read(n) + + def write(self, b): + if self.closed: + raise ValueError("write to closed file") + if isinstance(b, unicode): + raise TypeError("can't write unicode to binary stream") + n = len(b) + if n == 0: + return 0 + pos = self._pos + if pos > len(self._buffer): + # Inserts null bytes between the current end of the file + # and the new write position. + padding = b'\x00' * (pos - len(self._buffer)) + self._buffer += padding + self._buffer[pos:pos + n] = b + self._pos += n + return n + + def seek(self, pos, whence=0): + if self.closed: + raise ValueError("seek on closed file") + try: + pos = pos.__index__() + except AttributeError as err: + raise TypeError("an integer is required") + if whence == 0: + if pos < 0: + raise ValueError("negative seek position %r" % (pos,)) + self._pos = pos + elif whence == 1: + self._pos = max(0, self._pos + pos) + elif whence == 2: + self._pos = max(0, len(self._buffer) + pos) + else: + raise ValueError("invalid whence value") + return self._pos + + def tell(self): + if self.closed: + raise ValueError("tell on closed file") + return self._pos + + def truncate(self, pos=None): + if self.closed: + raise ValueError("truncate on closed file") + if pos is None: + pos = self._pos + elif pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] + return self.seek(pos) + + def readable(self): + return True + + def writable(self): + return True + + def seekable(self): + return True + + +class BufferedReader(_BufferedIOMixin): + + """BufferedReader(raw[, buffer_size]) + + A buffer for a readable, sequential BaseRawIO object. + + The constructor creates a BufferedReader for the given readable raw + stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE + is used. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + """Create a new buffered reader using the given readable raw IO object. + """ + if not raw.readable(): + raise IOError('"raw" argument must be readable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._reset_read_buf() + self._read_lock = Lock() + + def _reset_read_buf(self): + self._read_buf = b"" + self._read_pos = 0 + + def read(self, n=None): + """Read n bytes. + + Returns exactly n bytes of data unless the underlying raw IO + stream reaches EOF or if the call would block in non-blocking + mode. If n is negative, read until EOF or until read() would + block. + """ + if n is not None and n < -1: + raise ValueError("invalid number of bytes to read") + with self._read_lock: + return self._read_unlocked(n) + + def _read_unlocked(self, n=None): + nodata_val = b"" + empty_values = (b"", None) + buf = self._read_buf + pos = self._read_pos + + # Special case for when the number of bytes to read is unspecified. + if n is None or n == -1: + self._reset_read_buf() + chunks = [buf[pos:]] # Strip the consumed bytes. + current_size = 0 + while True: + # Read until EOF or until read() would block. + chunk = self.raw.read() + if chunk in empty_values: + nodata_val = chunk + break + current_size += len(chunk) + chunks.append(chunk) + return b"".join(chunks) or nodata_val + + # The number of bytes to read is specified, return at most n bytes. + avail = len(buf) - pos # Length of the available buffered data. + if n <= avail: + # Fast path: the data to read is fully buffered. + self._read_pos += n + return buf[pos:pos+n] + # Slow path: read from the stream until enough bytes are read, + # or until an EOF occurs or until read() would block. + chunks = [buf[pos:]] + wanted = max(self.buffer_size, n) + while avail < n: + chunk = self.raw.read(wanted) + if chunk in empty_values: + nodata_val = chunk + break + avail += len(chunk) + chunks.append(chunk) + # n is more then avail only when an EOF occurred or when + # read() would have blocked. + n = min(n, avail) + out = b"".join(chunks) + self._read_buf = out[n:] # Save the extra data in the buffer. + self._read_pos = 0 + return out[:n] if out else nodata_val + + def peek(self, n=0): + """Returns buffered bytes without advancing the position. + + The argument indicates a desired minimal number of bytes; we + do at most one raw read to satisfy it. We never return more + than self.buffer_size. + """ + with self._read_lock: + return self._peek_unlocked(n) + + def _peek_unlocked(self, n=0): + want = min(n, self.buffer_size) + have = len(self._read_buf) - self._read_pos + if have < want or have <= 0: + to_read = self.buffer_size - have + current = self.raw.read(to_read) + if current: + self._read_buf = self._read_buf[self._read_pos:] + current + self._read_pos = 0 + return self._read_buf[self._read_pos:] + + def read1(self, n): + """Reads up to n bytes, with at most one read() system call.""" + # Returns up to n bytes. If at least one byte is buffered, we + # only return buffered bytes. Otherwise, we do one raw read. + if n < 0: + raise ValueError("number of bytes to read must be positive") + if n == 0: + return b"" + with self._read_lock: + self._peek_unlocked(1) + return self._read_unlocked( + min(n, len(self._read_buf) - self._read_pos)) + + def tell(self): + return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence value") + with self._read_lock: + if whence == 1: + pos -= len(self._read_buf) - self._read_pos + pos = _BufferedIOMixin.seek(self, pos, whence) + self._reset_read_buf() + return pos + +class BufferedWriter(_BufferedIOMixin): + + """A buffer for a writeable sequential RawIO object. + + The constructor creates a BufferedWriter for the given writeable raw + stream. If the buffer_size is not given, it defaults to + DEFAULT_BUFFER_SIZE. + """ + + _warning_stack_offset = 2 + + def __init__(self, raw, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + if not raw.writable(): + raise IOError('"raw" argument must be writable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + if max_buffer_size is not None: + warnings.warn("max_buffer_size is deprecated", DeprecationWarning, + self._warning_stack_offset) + self.buffer_size = buffer_size + self._write_buf = bytearray() + self._write_lock = Lock() + + def write(self, b): + if self.closed: + raise ValueError("write to closed file") + if isinstance(b, unicode): + raise TypeError("can't write unicode to binary stream") + with self._write_lock: + # XXX we can implement some more tricks to try and avoid + # partial writes + if len(self._write_buf) > self.buffer_size: + # We're full, so let's pre-flush the buffer + try: + self._flush_unlocked() + except BlockingIOError as e: + # We can't accept anything else. + # XXX Why not just let the exception pass through? + raise BlockingIOError(e.errno, e.strerror, 0) + before = len(self._write_buf) + self._write_buf.extend(b) + written = len(self._write_buf) - before + if len(self._write_buf) > self.buffer_size: + try: + self._flush_unlocked() + except BlockingIOError as e: + if len(self._write_buf) > self.buffer_size: + # We've hit the buffer_size. We have to accept a partial + # write and cut back our buffer. + overage = len(self._write_buf) - self.buffer_size + written -= overage + self._write_buf = self._write_buf[:self.buffer_size] + raise BlockingIOError(e.errno, e.strerror, written) + return written + + def truncate(self, pos=None): + with self._write_lock: + self._flush_unlocked() + if pos is None: + pos = self.raw.tell() + return self.raw.truncate(pos) + + def flush(self): + with self._write_lock: + self._flush_unlocked() + + def _flush_unlocked(self): + if self.closed: + raise ValueError("flush of closed file") + written = 0 + try: + while self._write_buf: + n = self.raw.write(self._write_buf) + if n > len(self._write_buf) or n < 0: + raise IOError("write() returned incorrect number of bytes") + del self._write_buf[:n] + written += n + except BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + written += n + raise BlockingIOError(e.errno, e.strerror, written) + + def tell(self): + return _BufferedIOMixin.tell(self) + len(self._write_buf) + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence") + with self._write_lock: + self._flush_unlocked() + return _BufferedIOMixin.seek(self, pos, whence) + + +class BufferedRWPair(BufferedIOBase): + + """A buffered reader and writer object together. + + A buffered reader object and buffered writer object put together to + form a sequential IO object that can read and write. This is typically + used with a socket or two-way pipe. + + reader and writer are RawIOBase objects that are readable and + writeable respectively. If the buffer_size is omitted it defaults to + DEFAULT_BUFFER_SIZE. + """ + + # XXX The usefulness of this (compared to having two separate IO + # objects) is questionable. + + def __init__(self, reader, writer, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + """Constructor. + + The arguments are two RawIO instances. + """ + if max_buffer_size is not None: + warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2) + + if not reader.readable(): + raise IOError('"reader" argument must be readable.') + + if not writer.writable(): + raise IOError('"writer" argument must be writable.') + + self.reader = BufferedReader(reader, buffer_size) + self.writer = BufferedWriter(writer, buffer_size) + + def read(self, n=None): + if n is None: + n = -1 + return self.reader.read(n) + + def readinto(self, b): + return self.reader.readinto(b) + + def write(self, b): + return self.writer.write(b) + + def peek(self, n=0): + return self.reader.peek(n) + + def read1(self, n): + return self.reader.read1(n) + + def readable(self): + return self.reader.readable() + + def writable(self): + return self.writer.writable() + + def flush(self): + return self.writer.flush() + + def close(self): + self.writer.close() + self.reader.close() + + def isatty(self): + return self.reader.isatty() or self.writer.isatty() + + @property + def closed(self): + return self.writer.closed + + +class BufferedRandom(BufferedWriter, BufferedReader): + + """A buffered interface to random access streams. + + The constructor creates a reader and writer for a seekable stream, + raw, given in the first argument. If the buffer_size is omitted it + defaults to DEFAULT_BUFFER_SIZE. + """ + + _warning_stack_offset = 3 + + def __init__(self, raw, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + raw._checkSeekable() + BufferedReader.__init__(self, raw, buffer_size) + BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence") + self.flush() + if self._read_buf: + # Undo read ahead. + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + # First do the raw seek, then empty the read buffer, so that + # if the raw seek fails, we don't lose buffered data forever. + pos = self.raw.seek(pos, whence) + with self._read_lock: + self._reset_read_buf() + if pos < 0: + raise IOError("seek() returned invalid position") + return pos + + def tell(self): + if self._write_buf: + return BufferedWriter.tell(self) + else: + return BufferedReader.tell(self) + + def truncate(self, pos=None): + if pos is None: + pos = self.tell() + # Use seek to flush the read buffer. + self.seek(pos) + return BufferedWriter.truncate(self) + + def read(self, n=None): + if n is None: + n = -1 + self.flush() + return BufferedReader.read(self, n) + + def readinto(self, b): + self.flush() + return BufferedReader.readinto(self, b) + + def peek(self, n=0): + self.flush() + return BufferedReader.peek(self, n) + + def read1(self, n): + self.flush() + return BufferedReader.read1(self, n) + + def write(self, b): + if self._read_buf: + # Undo readahead + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + self._reset_read_buf() + return BufferedWriter.write(self, b) + + +class TextIOBase(IOBase): + + """Base class for text I/O. + + This class provides a character and line based interface to stream + I/O. There is no readinto method because Python's character strings + are immutable. There is no public constructor. + """ + + def read(self, n=-1): + """Read at most n characters from stream. + + Read from underlying buffer until we have n characters or we hit EOF. + If n is negative or omitted, read until EOF. + """ + self._unsupported("read") + + def write(self, s): + """Write string s to stream.""" + self._unsupported("write") + + def truncate(self, pos=None): + """Truncate size to pos.""" + self._unsupported("truncate") + + def readline(self): + """Read until newline or EOF. + + Returns an empty string if EOF is hit immediately. + """ + self._unsupported("readline") + + def detach(self): + """ + Separate the underlying buffer from the TextIOBase and return it. + + After the underlying buffer has been detached, the TextIO is in an + unusable state. + """ + self._unsupported("detach") + + @property + def encoding(self): + """Subclasses should override.""" + return None + + @property + def newlines(self): + """Line endings translated so far. + + Only line endings translated during reading are considered. + + Subclasses should override. + """ + return None + + @property + def errors(self): + """Error setting of the decoder or encoder. + + Subclasses should override.""" + return None + +io.TextIOBase.register(TextIOBase) + + +class IncrementalNewlineDecoder(codecs.IncrementalDecoder): + r"""Codec used when reading a file in universal newlines mode. It wraps + another incremental decoder, translating \r\n and \r into \n. It also + records the types of newlines encountered. When used with + translate=False, it ensures that the newline sequence is returned in + one piece. + """ + def __init__(self, decoder, translate, errors='strict'): + codecs.IncrementalDecoder.__init__(self, errors=errors) + self.translate = translate + self.decoder = decoder + self.seennl = 0 + self.pendingcr = False + + def decode(self, input, final=False): + # decode input (with the eventual \r from a previous pass) + if self.decoder is None: + output = input + else: + output = self.decoder.decode(input, final=final) + if self.pendingcr and (output or final): + output = "\r" + output + self.pendingcr = False + + # retain last \r even when not translating data: + # then readline() is sure to get \r\n in one pass + if output.endswith("\r") and not final: + output = output[:-1] + self.pendingcr = True + + # Record which newlines are read + crlf = output.count('\r\n') + cr = output.count('\r') - crlf + lf = output.count('\n') - crlf + self.seennl |= (lf and self._LF) | (cr and self._CR) \ + | (crlf and self._CRLF) + + if self.translate: + if crlf: + output = output.replace("\r\n", "\n") + if cr: + output = output.replace("\r", "\n") + + return output + + def getstate(self): + if self.decoder is None: + buf = b"" + flag = 0 + else: + buf, flag = self.decoder.getstate() + flag <<= 1 + if self.pendingcr: + flag |= 1 + return buf, flag + + def setstate(self, state): + buf, flag = state + self.pendingcr = bool(flag & 1) + if self.decoder is not None: + self.decoder.setstate((buf, flag >> 1)) + + def reset(self): + self.seennl = 0 + self.pendingcr = False + if self.decoder is not None: + self.decoder.reset() + + _LF = 1 + _CR = 2 + _CRLF = 4 + + @property + def newlines(self): + return (None, + "\n", + "\r", + ("\r", "\n"), + "\r\n", + ("\n", "\r\n"), + ("\r", "\r\n"), + ("\r", "\n", "\r\n") + )[self.seennl] + + +class TextIOWrapper(TextIOBase): + + r"""Character and line based layer over a BufferedIOBase object, buffer. + + encoding gives the name of the encoding that the stream will be + decoded or encoded with. It defaults to locale.getpreferredencoding. + + errors determines the strictness of encoding and decoding (see the + codecs.register) and defaults to "strict". + + newline can be None, '', '\n', '\r', or '\r\n'. It controls the + handling of line endings. If it is None, universal newlines is + enabled. With this enabled, on input, the lines endings '\n', '\r', + or '\r\n' are translated to '\n' before being returned to the + caller. Conversely, on output, '\n' is translated to the system + default line seperator, os.linesep. If newline is any other of its + legal values, that newline becomes the newline when the file is read + and it is returned untranslated. On output, '\n' is converted to the + newline. + + If line_buffering is True, a call to flush is implied when a call to + write contains a newline character. + """ + + _CHUNK_SIZE = 2048 + + def __init__(self, buffer, encoding=None, errors=None, newline=None, + line_buffering=False): + if newline is not None and not isinstance(newline, basestring): + raise TypeError("illegal newline type: %r" % (type(newline),)) + if newline not in (None, "", "\n", "\r", "\r\n"): + raise ValueError("illegal newline value: %r" % (newline,)) + if encoding is None: + try: + encoding = os.device_encoding(buffer.fileno()) + except (AttributeError, UnsupportedOperation): + pass + if encoding is None: + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + encoding = "ascii" + else: + encoding = locale.getpreferredencoding() + + if not isinstance(encoding, basestring): + raise ValueError("invalid encoding: %r" % encoding) + + if errors is None: + errors = "strict" + else: + if not isinstance(errors, basestring): + raise ValueError("invalid errors: %r" % errors) + + self.buffer = buffer + self._line_buffering = line_buffering + self._encoding = encoding + self._errors = errors + self._readuniversal = not newline + self._readtranslate = newline is None + self._readnl = newline + self._writetranslate = newline != '' + self._writenl = newline or os.linesep + self._encoder = None + self._decoder = None + self._decoded_chars = '' # buffer for text returned from decoder + self._decoded_chars_used = 0 # offset into _decoded_chars for read() + self._snapshot = None # info for reconstructing decoder state + self._seekable = self._telling = self.buffer.seekable() + + if self._seekable and self.writable(): + position = self.buffer.tell() + if position != 0: + try: + self._get_encoder().setstate(0) + except LookupError: + # Sometimes the encoder doesn't exist + pass + + # self._snapshot is either None, or a tuple (dec_flags, next_input) + # where dec_flags is the second (integer) item of the decoder state + # and next_input is the chunk of input bytes that comes next after the + # snapshot point. We use this to reconstruct decoder states in tell(). + + # Naming convention: + # - "bytes_..." for integer variables that count input bytes + # - "chars_..." for integer variables that count decoded characters + + def __repr__(self): + try: + name = self.name + except AttributeError: + return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding) + else: + return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format( + name, self.encoding) + + @property + def encoding(self): + return self._encoding + + @property + def errors(self): + return self._errors + + @property + def line_buffering(self): + return self._line_buffering + + def seekable(self): + return self._seekable + + def readable(self): + return self.buffer.readable() + + def writable(self): + return self.buffer.writable() + + def flush(self): + self.buffer.flush() + self._telling = self._seekable + + def close(self): + if self.buffer is not None: + try: + self.flush() + except IOError: + pass # If flush() fails, just give up + self.buffer.close() + + @property + def closed(self): + return self.buffer.closed + + @property + def name(self): + return self.buffer.name + + def fileno(self): + return self.buffer.fileno() + + def isatty(self): + return self.buffer.isatty() + + def write(self, s): + if self.closed: + raise ValueError("write to closed file") + if not isinstance(s, unicode): + raise TypeError("can't write %s to text stream" % + s.__class__.__name__) + length = len(s) + haslf = (self._writetranslate or self._line_buffering) and "\n" in s + if haslf and self._writetranslate and self._writenl != "\n": + s = s.replace("\n", self._writenl) + encoder = self._encoder or self._get_encoder() + # XXX What if we were just reading? + b = encoder.encode(s) + self.buffer.write(b) + if self._line_buffering and (haslf or "\r" in s): + self.flush() + self._snapshot = None + if self._decoder: + self._decoder.reset() + return length + + def _get_encoder(self): + make_encoder = codecs.getincrementalencoder(self._encoding) + self._encoder = make_encoder(self._errors) + return self._encoder + + def _get_decoder(self): + make_decoder = codecs.getincrementaldecoder(self._encoding) + decoder = make_decoder(self._errors) + if self._readuniversal: + decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) + self._decoder = decoder + return decoder + + # The following three methods implement an ADT for _decoded_chars. + # Text returned from the decoder is buffered here until the client + # requests it by calling our read() or readline() method. + def _set_decoded_chars(self, chars): + """Set the _decoded_chars buffer.""" + self._decoded_chars = chars + self._decoded_chars_used = 0 + + def _get_decoded_chars(self, n=None): + """Advance into the _decoded_chars buffer.""" + offset = self._decoded_chars_used + if n is None: + chars = self._decoded_chars[offset:] + else: + chars = self._decoded_chars[offset:offset + n] + self._decoded_chars_used += len(chars) + return chars + + def _rewind_decoded_chars(self, n): + """Rewind the _decoded_chars buffer.""" + if self._decoded_chars_used < n: + raise AssertionError("rewind decoded_chars out of bounds") + self._decoded_chars_used -= n + + def _read_chunk(self): + """ + Read and decode the next chunk of data from the BufferedReader. + """ + + # The return value is True unless EOF was reached. The decoded + # string is placed in self._decoded_chars (replacing its previous + # value). The entire input chunk is sent to the decoder, though + # some of it may remain buffered in the decoder, yet to be + # converted. + + if self._decoder is None: + raise ValueError("no decoder") + + if self._telling: + # To prepare for tell(), we need to snapshot a point in the + # file where the decoder's input buffer is empty. + + dec_buffer, dec_flags = self._decoder.getstate() + # Given this, we know there was a valid snapshot point + # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + + # Read a chunk, decode it, and put the result in self._decoded_chars. + input_chunk = self.buffer.read1(self._CHUNK_SIZE) + eof = not input_chunk + self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) + + if self._telling: + # At the snapshot point, len(dec_buffer) bytes before the read, + # the next input to be decoded is dec_buffer + input_chunk. + self._snapshot = (dec_flags, dec_buffer + input_chunk) + + return not eof + + def _pack_cookie(self, position, dec_flags=0, + bytes_to_feed=0, need_eof=0, chars_to_skip=0): + # The meaning of a tell() cookie is: seek to position, set the + # decoder flags to dec_flags, read bytes_to_feed bytes, feed them + # into the decoder with need_eof as the EOF flag, then skip + # chars_to_skip characters of the decoded result. For most simple + # decoders, tell() will often just give a byte offset in the file. + return (position | (dec_flags<<64) | (bytes_to_feed<<128) | + (chars_to_skip<<192) | bool(need_eof)<<256) + + def _unpack_cookie(self, bigint): + rest, position = divmod(bigint, 1<<64) + rest, dec_flags = divmod(rest, 1<<64) + rest, bytes_to_feed = divmod(rest, 1<<64) + need_eof, chars_to_skip = divmod(rest, 1<<64) + return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip + + def tell(self): + if not self._seekable: + raise IOError("underlying stream is not seekable") + if not self._telling: + raise IOError("telling position disabled by next() call") + self.flush() + position = self.buffer.tell() + decoder = self._decoder + if decoder is None or self._snapshot is None: + if self._decoded_chars: + # This should never happen. + raise AssertionError("pending decoded text") + return position + + # Skip backward to the snapshot point (see _read_chunk). + dec_flags, next_input = self._snapshot + position -= len(next_input) + + # How many decoded characters have been used up since the snapshot? + chars_to_skip = self._decoded_chars_used + if chars_to_skip == 0: + # We haven't moved from the snapshot point. + return self._pack_cookie(position, dec_flags) + + # Starting from the snapshot position, we will walk the decoder + # forward until it gives us enough decoded characters. + saved_state = decoder.getstate() + try: + # Note our initial start point. + decoder.setstate((b'', dec_flags)) + start_pos = position + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + need_eof = 0 + + # Feed the decoder one byte at a time. As we go, note the + # nearest "safe start point" before the current location + # (a point where the decoder has nothing buffered, so seek() + # can safely start from there and advance to this location). + for next_byte in next_input: + bytes_fed += 1 + chars_decoded += len(decoder.decode(next_byte)) + dec_buffer, dec_flags = decoder.getstate() + if not dec_buffer and chars_decoded <= chars_to_skip: + # Decoder buffer is empty, so this is a safe start point. + start_pos += bytes_fed + chars_to_skip -= chars_decoded + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + if chars_decoded >= chars_to_skip: + break + else: + # We didn't get enough decoded data; signal EOF to get more. + chars_decoded += len(decoder.decode(b'', final=True)) + need_eof = 1 + if chars_decoded < chars_to_skip: + raise IOError("can't reconstruct logical file position") + + # The returned cookie corresponds to the last safe start point. + return self._pack_cookie( + start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) + finally: + decoder.setstate(saved_state) + + def truncate(self, pos=None): + self.flush() + if pos is None: + pos = self.tell() + self.seek(pos) + return self.buffer.truncate() + + def detach(self): + if self.buffer is None: + raise ValueError("buffer is already detached") + self.flush() + buffer = self.buffer + self.buffer = None + return buffer + + def seek(self, cookie, whence=0): + if self.closed: + raise ValueError("tell on closed file") + if not self._seekable: + raise IOError("underlying stream is not seekable") + if whence == 1: # seek relative to current position + if cookie != 0: + raise IOError("can't do nonzero cur-relative seeks") + # Seeking to the current position should attempt to + # sync the underlying buffer with the current position. + whence = 0 + cookie = self.tell() + if whence == 2: # seek relative to end of file + if cookie != 0: + raise IOError("can't do nonzero end-relative seeks") + self.flush() + position = self.buffer.seek(0, 2) + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + return position + if whence != 0: + raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % + (whence,)) + if cookie < 0: + raise ValueError("negative seek position %r" % (cookie,)) + self.flush() + + # The strategy of seek() is to go back to the safe start point + # and replay the effect of read(chars_to_skip) from there. + start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ + self._unpack_cookie(cookie) + + # Seek back to the safe start point. + self.buffer.seek(start_pos) + self._set_decoded_chars('') + self._snapshot = None + + # Restore the decoder to its state from the safe start point. + if cookie == 0 and self._decoder: + self._decoder.reset() + elif self._decoder or dec_flags or chars_to_skip: + self._decoder = self._decoder or self._get_decoder() + self._decoder.setstate((b'', dec_flags)) + self._snapshot = (dec_flags, b'') + + if chars_to_skip: + # Just like _read_chunk, feed the decoder and save a snapshot. + input_chunk = self.buffer.read(bytes_to_feed) + self._set_decoded_chars( + self._decoder.decode(input_chunk, need_eof)) + self._snapshot = (dec_flags, input_chunk) + + # Skip chars_to_skip of the decoded characters. + if len(self._decoded_chars) < chars_to_skip: + raise IOError("can't restore logical file position") + self._decoded_chars_used = chars_to_skip + + # Finally, reset the encoder (merely useful for proper BOM handling) + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if cookie != 0: + encoder.setstate(0) + else: + encoder.reset() + return cookie + + def read(self, n=None): + self._checkReadable() + if n is None: + n = -1 + decoder = self._decoder or self._get_decoder() + if n < 0: + # Read everything. + result = (self._get_decoded_chars() + + decoder.decode(self.buffer.read(), final=True)) + self._set_decoded_chars('') + self._snapshot = None + return result + else: + # Keep reading chunks until we have n characters to return. + eof = False + result = self._get_decoded_chars(n) + while len(result) < n and not eof: + eof = not self._read_chunk() + result += self._get_decoded_chars(n - len(result)) + return result + + def next(self): + self._telling = False + line = self.readline() + if not line: + self._snapshot = None + self._telling = self._seekable + raise StopIteration + return line + + def readline(self, limit=None): + if self.closed: + raise ValueError("read from closed file") + if limit is None: + limit = -1 + elif not isinstance(limit, (int, long)): + raise TypeError("limit must be an integer") + + # Grab all the decoded text (we will rewind any extra bits later). + line = self._get_decoded_chars() + + start = 0 + # Make the decoder if it doesn't already exist. + if not self._decoder: + self._get_decoder() + + pos = endpos = None + while True: + if self._readtranslate: + # Newlines are already translated, only search for \n + pos = line.find('\n', start) + if pos >= 0: + endpos = pos + 1 + break + else: + start = len(line) + + elif self._readuniversal: + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + + # In C we'd look for these in parallel of course. + nlpos = line.find("\n", start) + crpos = line.find("\r", start) + if crpos == -1: + if nlpos == -1: + # Nothing found + start = len(line) + else: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == -1: + # Found lone \r + endpos = crpos + 1 + break + elif nlpos < crpos: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == crpos + 1: + # Found \r\n + endpos = crpos + 2 + break + else: + # Found \r + endpos = crpos + 1 + break + else: + # non-universal + pos = line.find(self._readnl) + if pos >= 0: + endpos = pos + len(self._readnl) + break + + if limit >= 0 and len(line) >= limit: + endpos = limit # reached length limit + break + + # No line ending seen yet - get more data' + while self._read_chunk(): + if self._decoded_chars: + break + if self._decoded_chars: + line += self._get_decoded_chars() + else: + # end of file + self._set_decoded_chars('') + self._snapshot = None + return line + + if limit >= 0 and endpos > limit: + endpos = limit # don't exceed limit + + # Rewind _decoded_chars to just after the line ending we found. + self._rewind_decoded_chars(len(line) - endpos) + return line[:endpos] + + @property + def newlines(self): + return self._decoder.newlines if self._decoder else None + + +class StringIO(TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. + """ + + def __init__(self, initial_value="", newline="\n"): + super(StringIO, self).__init__(BytesIO(), + encoding="utf-8", + errors="strict", + newline=newline) + # Issue #5645: make universal newlines semantics the same as in the + # C version, even under Windows. + if newline is None: + self._writetranslate = False + if initial_value: + if not isinstance(initial_value, unicode): + initial_value = unicode(initial_value) + self.write(initial_value) + self.seek(0) + + def getvalue(self): + self.flush() + return self.buffer.getvalue().decode(self._encoding, self._errors) + + def __repr__(self): + # TextIOWrapper tells the encoding in its repr. In StringIO, + # that's a implementation detail. + return object.__repr__(self) + + @property + def errors(self): + return None + + @property + def encoding(self): + return None + + def detach(self): + # This doesn't make sense on StringIO. + self._unsupported("detach") diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/io.py --- a/Lib/io.py +++ b/Lib/io.py @@ -1,5 +1,4 @@ -""" -The io module provides the Python interfaces to stream handling. The +"""The io module provides the Python interfaces to stream handling. The builtin open function is defined in this module. At the top of the I/O hierarchy is the abstract base class IOBase. It @@ -35,9 +34,6 @@ DEFAULT_BUFFER_SIZE """ # New I/O library conforming to PEP 3116. -# This is a prototype; hopefully eventually some of this will be -# reimplemented in C. - # XXX edge cases when switching between reading/writing # XXX need to support 1 meaning line-buffered # XXX whenever an argument is None, use the default value @@ -45,1825 +41,58 @@ DEFAULT_BUFFER_SIZE # XXX buffered readinto should work with arbitrary buffer objects # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG # XXX check writable, readable and seekable in appropriate places -from __future__ import print_function -from __future__ import unicode_literals + __author__ = ("Guido van Rossum , " "Mike Verdone , " - "Mark Russell ") + "Mark Russell , " + "Antoine Pitrou , " + "Amaury Forgeot d'Arc , " + "Benjamin Peterson ") __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO", "BytesIO", "StringIO", "BufferedIOBase", "BufferedReader", "BufferedWriter", "BufferedRWPair", "BufferedRandom", "TextIOBase", "TextIOWrapper", - "SEEK_SET", "SEEK_CUR", "SEEK_END"] + "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END"] -import os + +import _io import abc -import codecs -import _fileio -import threading -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes +from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, + open, FileIO, BytesIO, StringIO, BufferedReader, + BufferedWriter, BufferedRWPair, BufferedRandom, + IncrementalNewlineDecoder, TextIOWrapper) + +OpenWrapper = _io.open # for compatibility with _pyio # for seek() SEEK_SET = 0 SEEK_CUR = 1 SEEK_END = 2 -# py3k has only new style classes -__metaclass__ = type +# Declaring ABCs in C is tricky so we do it here. +# Method descriptions and default implementations are inherited from the C +# version however. +class IOBase(_io._IOBase): + __metaclass__ = abc.ABCMeta -class BlockingIOError(IOError): - - """Exception raised when I/O would block on a non-blocking I/O stream.""" - - def __init__(self, errno, strerror, characters_written=0): - IOError.__init__(self, errno, strerror) - self.characters_written = characters_written - - -def open(file, mode="r", buffering=None, encoding=None, errors=None, - newline=None, closefd=True): - r"""Open file and return a stream. If the file cannot be opened, an IOError is - raised. - - file is either a string giving the name (and the path if the file - isn't in the current working directory) of the file to be opened or an - integer file descriptor of the file to be wrapped. (If a file - descriptor is given, it is closed when the returned I/O object is - closed, unless closefd is set to False.) - - mode is an optional string that specifies the mode in which the file - is opened. It defaults to 'r' which means open for reading in text - mode. Other common values are 'w' for writing (truncating the file if - it already exists), and 'a' for appending (which on some Unix systems, - means that all writes append to the end of the file regardless of the - current seek position). In text mode, if encoding is not specified the - encoding used is platform dependent. (For reading and writing raw - bytes use binary mode and leave encoding unspecified.) The available - modes are: - - ========= =============================================================== - Character Meaning - --------- --------------------------------------------------------------- - 'r' open for reading (default) - 'w' open for writing, truncating the file first - 'a' open for writing, appending to the end of the file if it exists - 'b' binary mode - 't' text mode (default) - '+' open a disk file for updating (reading and writing) - 'U' universal newline mode (for backwards compatibility; unneeded - for new code) - ========= =============================================================== - - The default mode is 'rt' (open for reading text). For binary random - access, the mode 'w+b' opens and truncates the file to 0 bytes, while - 'r+b' opens the file without truncation. - - Python distinguishes between files opened in binary and text modes, - even when the underlying operating system doesn't. Files opened in - binary mode (appending 'b' to the mode argument) return contents as - bytes objects without any decoding. In text mode (the default, or when - 't' is appended to the mode argument), the contents of the file are - returned as strings, the bytes having been first decoded using a - platform-dependent encoding or using the specified encoding if given. - - buffering is an optional integer used to set the buffering policy. By - default full buffering is on. Pass 0 to switch buffering off (only - allowed in binary mode), 1 to set line buffering, and an integer > 1 - for full buffering. - - encoding is the name of the encoding used to decode or encode the - file. This should only be used in text mode. The default encoding is - platform dependent, but any encoding supported by Python can be - passed. See the codecs module for the list of supported encodings. - - errors is an optional string that specifies how encoding errors are to - be handled---this argument should not be used in binary mode. Pass - 'strict' to raise a ValueError exception if there is an encoding error - (the default of None has the same effect), or pass 'ignore' to ignore - errors. (Note that ignoring encoding errors can lead to data loss.) - See the documentation for codecs.register for a list of the permitted - encoding error strings. - - newline controls how universal newlines works (it only applies to text - mode). It can be None, '', '\n', '\r', and '\r\n'. It works as - follows: - - * On input, if newline is None, universal newlines mode is - enabled. Lines in the input can end in '\n', '\r', or '\r\n', and - these are translated into '\n' before being returned to the - caller. If it is '', universal newline mode is enabled, but line - endings are returned to the caller untranslated. If it has any of - the other legal values, input lines are only terminated by the given - string, and the line ending is returned to the caller untranslated. - - * On output, if newline is None, any '\n' characters written are - translated to the system default line separator, os.linesep. If - newline is '', no translation takes place. If newline is any of the - other legal values, any '\n' characters written are translated to - the given string. - - If closefd is False, the underlying file descriptor will be kept open - when the file is closed. This does not work when a file name is given - and must be True in that case. - - open() returns a file object whose type depends on the mode, and - through which the standard file operations such as reading and writing - are performed. When open() is used to open a file in a text mode ('w', - 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open - a file in a binary mode, the returned class varies: in read binary - mode, it returns a BufferedReader; in write binary and append binary - modes, it returns a BufferedWriter, and in read/write mode, it returns - a BufferedRandom. - - It is also possible to use a string or bytearray as a file for both - reading and writing. For strings StringIO can be used like a file - opened in a text mode, and for bytes a BytesIO can be used like a file - opened in a binary mode. - """ - if not isinstance(file, (basestring, int)): - raise TypeError("invalid file: %r" % file) - if not isinstance(mode, basestring): - raise TypeError("invalid mode: %r" % mode) - if buffering is not None and not isinstance(buffering, int): - raise TypeError("invalid buffering: %r" % buffering) - if encoding is not None and not isinstance(encoding, basestring): - raise TypeError("invalid encoding: %r" % encoding) - if errors is not None and not isinstance(errors, basestring): - raise TypeError("invalid errors: %r" % errors) - modes = set(mode) - if modes - set("arwb+tU") or len(mode) > len(modes): - raise ValueError("invalid mode: %r" % mode) - reading = "r" in modes - writing = "w" in modes - appending = "a" in modes - updating = "+" in modes - text = "t" in modes - binary = "b" in modes - if "U" in modes: - if writing or appending: - raise ValueError("can't use U and writing mode at once") - reading = True - if text and binary: - raise ValueError("can't have text and binary mode at once") - if reading + writing + appending > 1: - raise ValueError("can't have read/write/append mode at once") - if not (reading or writing or appending): - raise ValueError("must have exactly one of read/write/append mode") - if binary and encoding is not None: - raise ValueError("binary mode doesn't take an encoding argument") - if binary and errors is not None: - raise ValueError("binary mode doesn't take an errors argument") - if binary and newline is not None: - raise ValueError("binary mode doesn't take a newline argument") - raw = FileIO(file, - (reading and "r" or "") + - (writing and "w" or "") + - (appending and "a" or "") + - (updating and "+" or ""), - closefd) - if buffering is None: - buffering = -1 - line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): - buffering = -1 - line_buffering = True - if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (os.error, AttributeError): - pass - else: - if bs > 1: - buffering = bs - if buffering < 0: - raise ValueError("invalid buffering size") - if buffering == 0: - if binary: - return raw - raise ValueError("can't have unbuffered text I/O") - if updating: - buffer = BufferedRandom(raw, buffering) - elif writing or appending: - buffer = BufferedWriter(raw, buffering) - elif reading: - buffer = BufferedReader(raw, buffering) - else: - raise ValueError("unknown mode: %r" % mode) - if binary: - return buffer - text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) - text.mode = mode - return text - -class _DocDescriptor: - """Helper for builtins.open.__doc__ - """ - def __get__(self, obj, typ): - return ( - "open(file, mode='r', buffering=None, encoding=None, " - "errors=None, newline=None, closefd=True)\n\n" + - open.__doc__) - -class OpenWrapper: - """Wrapper for builtins.open - - Trick so that open won't become a bound method when stored - as a class variable (as dumbdbm does). - - See initstdio() in Python/pythonrun.c. - """ - __doc__ = _DocDescriptor() - - def __new__(cls, *args, **kwargs): - return open(*args, **kwargs) - - -class UnsupportedOperation(ValueError, IOError): +class RawIOBase(_io._RawIOBase, IOBase): pass +class BufferedIOBase(_io._BufferedIOBase, IOBase): + pass -class IOBase(object): +class TextIOBase(_io._TextIOBase, IOBase): + pass - """The abstract base class for all I/O classes, acting on streams of - bytes. There is no public constructor. +RawIOBase.register(FileIO) - This class provides dummy implementations for many methods that - derived classes can override selectively; the default implementations - represent a file that cannot be read, written or seeked. +for klass in (BytesIO, BufferedReader, BufferedWriter, BufferedRandom, + BufferedRWPair): + BufferedIOBase.register(klass) - Even though IOBase does not declare read, readinto, or write because - their signatures will vary, implementations and clients should - consider those methods part of the interface. Also, implementations - may raise a IOError when operations they do not support are called. - - The basic type used for binary data read from or written to a file is - bytes. bytearrays are accepted too, and in some cases (such as - readinto) needed. Text I/O classes work with str data. - - Note that calling any method (even inquiries) on a closed stream is - undefined. Implementations may raise IOError in this case. - - IOBase (and its subclasses) support the iterator protocol, meaning - that an IOBase object can be iterated over yielding the lines in a - stream. - - IOBase also supports the :keyword:`with` statement. In this example, - fp is closed after the suite of the with statment is complete: - - with open('spam.txt', 'r') as fp: - fp.write('Spam and eggs!') - """ - - __metaclass__ = abc.ABCMeta - - ### Internal ### - - def _unsupported(self, name): - """Internal: raise an exception for unsupported operations.""" - raise UnsupportedOperation("%s.%s() not supported" % - (self.__class__.__name__, name)) - - ### Positioning ### - - def seek(self, pos, whence = 0): - """Change stream position. - - Change the stream position to byte offset offset. offset is - interpreted relative to the position indicated by whence. Values - for whence are: - - * 0 -- start of stream (the default); offset should be zero or positive - * 1 -- current stream position; offset may be negative - * 2 -- end of stream; offset is usually negative - - Return the new absolute position. - """ - self._unsupported("seek") - - def tell(self): - """Return current stream position.""" - return self.seek(0, 1) - - def truncate(self, pos = None): - """Truncate file to size bytes. - - Size defaults to the current IO position as reported by tell(). Return - the new size. - """ - self._unsupported("truncate") - - ### Flush and close ### - - def flush(self): - """Flush write buffers, if applicable. - - This is not implemented for read-only and non-blocking streams. - """ - # XXX Should this return the number of bytes written??? - - __closed = False - - def close(self): - """Flush and close the IO object. - - This method has no effect if the file is already closed. - """ - if not self.__closed: - try: - self.flush() - except IOError: - pass # If flush() fails, just give up - self.__closed = True - - def __del__(self): - """Destructor. Calls close().""" - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass - - ### Inquiries ### - - def seekable(self): - """Return whether object supports random access. - - If False, seek(), tell() and truncate() will raise IOError. - This method may need to do a test seek(). - """ - return False - - def _checkSeekable(self, msg=None): - """Internal: raise an IOError if file is not seekable - """ - if not self.seekable(): - raise IOError("File or stream is not seekable." - if msg is None else msg) - - - def readable(self): - """Return whether object was opened for reading. - - If False, read() will raise IOError. - """ - return False - - def _checkReadable(self, msg=None): - """Internal: raise an IOError if file is not readable - """ - if not self.readable(): - raise IOError("File or stream is not readable." - if msg is None else msg) - - def writable(self): - """Return whether object was opened for writing. - - If False, write() and truncate() will raise IOError. - """ - return False - - def _checkWritable(self, msg=None): - """Internal: raise an IOError if file is not writable - """ - if not self.writable(): - raise IOError("File or stream is not writable." - if msg is None else msg) - - @property - def closed(self): - """closed: bool. True iff the file has been closed. - - For backwards compatibility, this is a property, not a predicate. - """ - return self.__closed - - def _checkClosed(self, msg=None): - """Internal: raise an ValueError if file is closed - """ - if self.closed: - raise ValueError("I/O operation on closed file." - if msg is None else msg) - - ### Context manager ### - - def __enter__(self): - """Context management protocol. Returns self.""" - self._checkClosed() - return self - - def __exit__(self, *args): - """Context management protocol. Calls close()""" - self.close() - - ### Lower-level APIs ### - - # XXX Should these be present even if unimplemented? - - def fileno(self): - """Returns underlying file descriptor if one exists. - - An IOError is raised if the IO object does not use a file descriptor. - """ - self._unsupported("fileno") - - def isatty(self): - """Return whether this is an 'interactive' stream. - - Return False if it can't be determined. - """ - self._checkClosed() - return False - - ### Readline[s] and writelines ### - - def readline(self, limit = -1): - r"""Read and return a line from the stream. - - If limit is specified, at most limit bytes will be read. - - The line terminator is always b'\n' for binary files; for text - files, the newlines argument to open can be used to select the line - terminator(s) recognized. - """ - self._checkClosed() - if hasattr(self, "peek"): - def nreadahead(): - readahead = self.peek(1) - if not readahead: - return 1 - n = (readahead.find(b"\n") + 1) or len(readahead) - if limit >= 0: - n = min(n, limit) - return n - else: - def nreadahead(): - return 1 - if limit is None: - limit = -1 - if not isinstance(limit, (int, long)): - raise TypeError("limit must be an integer") - res = bytearray() - while limit < 0 or len(res) < limit: - b = self.read(nreadahead()) - if not b: - break - res += b - if res.endswith(b"\n"): - break - return bytes(res) - - def __iter__(self): - self._checkClosed() - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def readlines(self, hint=None): - """Return a list of lines from the stream. - - hint can be specified to control the number of lines read: no more - lines will be read if the total size (in bytes/characters) of all - lines so far exceeds hint. - """ - if hint is None: - hint = -1 - if not isinstance(hint, (int, long)): - raise TypeError("hint must be an integer") - if hint <= 0: - return list(self) - n = 0 - lines = [] - for line in self: - lines.append(line) - n += len(line) - if n >= hint: - break - return lines - - def writelines(self, lines): - self._checkClosed() - for line in lines: - self.write(line) - - -class RawIOBase(IOBase): - - """Base class for raw binary I/O.""" - - # The read() method is implemented by calling readinto(); derived - # classes that want to support read() only need to implement - # readinto() as a primitive operation. In general, readinto() can be - # more efficient than read(). - - # (It would be tempting to also provide an implementation of - # readinto() in terms of read(), in case the latter is a more suitable - # primitive operation, but that would lead to nasty recursion in case - # a subclass doesn't implement either.) - - def read(self, n = -1): - """Read and return up to n bytes. - - Returns an empty bytes array on EOF, or None if the object is - set not to block and has no data to read. - """ - if n is None: - n = -1 - if n < 0: - return self.readall() - b = bytearray(n.__index__()) - n = self.readinto(b) - del b[n:] - return bytes(b) - - def readall(self): - """Read until EOF, using multiple read() call.""" - res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break - res += data - return bytes(res) - - def readinto(self, b): - """Read up to len(b) bytes into b. - - Returns number of bytes read (0 for EOF), or None if the object - is set not to block as has no data to read. - """ - self._unsupported("readinto") - - def write(self, b): - """Write the given buffer to the IO stream. - - Returns the number of bytes written, which may be less than len(b). - """ - self._unsupported("write") - - -class FileIO(_fileio._FileIO, RawIOBase): - - """Raw I/O implementation for OS files.""" - - # This multiply inherits from _FileIO and RawIOBase to make - # isinstance(io.FileIO(), io.RawIOBase) return True without requiring - # that _fileio._FileIO inherits from io.RawIOBase (which would be hard - # to do since _fileio.c is written in C). - - def __init__(self, name, mode="r", closefd=True): - _fileio._FileIO.__init__(self, name, mode, closefd) - self._name = name - - def close(self): - _fileio._FileIO.close(self) - RawIOBase.close(self) - - @property - def name(self): - return self._name - - -class BufferedIOBase(IOBase): - - """Base class for buffered IO objects. - - The main difference with RawIOBase is that the read() method - supports omitting the size argument, and does not have a default - implementation that defers to readinto(). - - In addition, read(), readinto() and write() may raise - BlockingIOError if the underlying raw stream is in non-blocking - mode and not ready; unlike their raw counterparts, they will never - return None. - - A typical implementation should not inherit from a RawIOBase - implementation, but wrap one. - """ - - def read(self, n = None): - """Read and return up to n bytes. - - If the argument is omitted, None, or negative, reads and - returns all data until EOF. - - If the argument is positive, and the underlying raw stream is - not 'interactive', multiple raw reads may be issued to satisfy - the byte count (unless EOF is reached first). But for - interactive raw streams (XXX and for pipes?), at most one raw - read will be issued, and a short result does not imply that - EOF is imminent. - - Returns an empty bytes array on EOF. - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - self._unsupported("read") - - def readinto(self, b): - """Read up to len(b) bytes into b. - - Like read(), this may issue multiple reads to the underlying raw - stream, unless the latter is 'interactive'. - - Returns the number of bytes read (0 for EOF). - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - # XXX This ought to work with anything that supports the buffer API - data = self.read(len(b)) - n = len(data) - try: - b[:n] = data - except TypeError as err: - import array - if not isinstance(b, array.array): - raise err - b[:n] = array.array(b'b', data) - return n - - def write(self, b): - """Write the given buffer to the IO stream. - - Return the number of bytes written, which is never less than - len(b). - - Raises BlockingIOError if the buffer is full and the - underlying raw stream cannot accept more data at the moment. - """ - self._unsupported("write") - - -class _BufferedIOMixin(BufferedIOBase): - - """A mixin implementation of BufferedIOBase with an underlying raw stream. - - This passes most requests on to the underlying raw stream. It - does *not* provide implementations of read(), readinto() or - write(). - """ - - def __init__(self, raw): - self.raw = raw - - ### Positioning ### - - def seek(self, pos, whence=0): - return self.raw.seek(pos, whence) - - def tell(self): - return self.raw.tell() - - def truncate(self, pos=None): - # Flush the stream. We're mixing buffered I/O with lower-level I/O, - # and a flush may be necessary to synch both views of the current - # file state. - self.flush() - - if pos is None: - pos = self.tell() - # XXX: Should seek() be used, instead of passing the position - # XXX directly to truncate? - return self.raw.truncate(pos) - - ### Flush and close ### - - def flush(self): - self.raw.flush() - - def close(self): - if not self.closed: - try: - self.flush() - except IOError: - pass # If flush() fails, just give up - self.raw.close() - - ### Inquiries ### - - def seekable(self): - return self.raw.seekable() - - def readable(self): - return self.raw.readable() - - def writable(self): - return self.raw.writable() - - @property - def closed(self): - return self.raw.closed - - @property - def name(self): - return self.raw.name - - @property - def mode(self): - return self.raw.mode - - ### Lower-level APIs ### - - def fileno(self): - return self.raw.fileno() - - def isatty(self): - return self.raw.isatty() - - -class _BytesIO(BufferedIOBase): - - """Buffered I/O implementation using an in-memory bytes buffer.""" - - # XXX More docs - - def __init__(self, initial_bytes=None): - buf = bytearray() - if initial_bytes is not None: - buf += bytearray(initial_bytes) - self._buffer = buf - self._pos = 0 - - def getvalue(self): - """Return the bytes value (contents) of the buffer - """ - if self.closed: - raise ValueError("getvalue on closed file") - return bytes(self._buffer) - - def read(self, n=None): - if self.closed: - raise ValueError("read from closed file") - if n is None: - n = -1 - if not isinstance(n, (int, long)): - raise TypeError("argument must be an integer") - if n < 0: - n = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + n) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) - - def read1(self, n): - """this is the same as read. - """ - return self.read(n) - - def write(self, b): - if self.closed: - raise ValueError("write to closed file") - if isinstance(b, unicode): - raise TypeError("can't write unicode to binary stream") - n = len(b) - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n - return n - - def seek(self, pos, whence=0): - if self.closed: - raise ValueError("seek on closed file") - try: - pos = pos.__index__() - except AttributeError as err: - raise TypeError("an integer is required") # from err - if whence == 0: - if pos < 0: - raise ValueError("negative seek position %r" % (pos,)) - self._pos = pos - elif whence == 1: - self._pos = max(0, self._pos + pos) - elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) - else: - raise ValueError("invalid whence value") - return self._pos - - def tell(self): - if self.closed: - raise ValueError("tell on closed file") - return self._pos - - def truncate(self, pos=None): - if self.closed: - raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - elif pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] - return self.seek(pos) - - def readable(self): - return True - - def writable(self): - return True - - def seekable(self): - return True - -# Use the faster implementation of BytesIO if available -try: - import _bytesio - - class BytesIO(_bytesio._BytesIO, BufferedIOBase): - __doc__ = _bytesio._BytesIO.__doc__ - -except ImportError: - BytesIO = _BytesIO - - -class BufferedReader(_BufferedIOMixin): - - """BufferedReader(raw[, buffer_size]) - - A buffer for a readable, sequential BaseRawIO object. - - The constructor creates a BufferedReader for the given readable raw - stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE - is used. - """ - - def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): - """Create a new buffered reader using the given readable raw IO object. - """ - raw._checkReadable() - _BufferedIOMixin.__init__(self, raw) - self.buffer_size = buffer_size - self._reset_read_buf() - self._read_lock = threading.Lock() - - def _reset_read_buf(self): - self._read_buf = b"" - self._read_pos = 0 - - def read(self, n=None): - """Read n bytes. - - Returns exactly n bytes of data unless the underlying raw IO - stream reaches EOF or if the call would block in non-blocking - mode. If n is negative, read until EOF or until read() would - block. - """ - with self._read_lock: - return self._read_unlocked(n) - - def _read_unlocked(self, n=None): - nodata_val = b"" - empty_values = (b"", None) - buf = self._read_buf - pos = self._read_pos - - # Special case for when the number of bytes to read is unspecified. - if n is None or n == -1: - self._reset_read_buf() - chunks = [buf[pos:]] # Strip the consumed bytes. - current_size = 0 - while True: - # Read until EOF or until read() would block. - chunk = self.raw.read() - if chunk in empty_values: - nodata_val = chunk - break - current_size += len(chunk) - chunks.append(chunk) - return b"".join(chunks) or nodata_val - - # The number of bytes to read is specified, return at most n bytes. - avail = len(buf) - pos # Length of the available buffered data. - if n <= avail: - # Fast path: the data to read is fully buffered. - self._read_pos += n - return buf[pos:pos+n] - # Slow path: read from the stream until enough bytes are read, - # or until an EOF occurs or until read() would block. - chunks = [buf[pos:]] - wanted = max(self.buffer_size, n) - while avail < n: - chunk = self.raw.read(wanted) - if chunk in empty_values: - nodata_val = chunk - break - avail += len(chunk) - chunks.append(chunk) - # n is more then avail only when an EOF occurred or when - # read() would have blocked. - n = min(n, avail) - out = b"".join(chunks) - self._read_buf = out[n:] # Save the extra data in the buffer. - self._read_pos = 0 - return out[:n] if out else nodata_val - - def peek(self, n=0): - """Returns buffered bytes without advancing the position. - - The argument indicates a desired minimal number of bytes; we - do at most one raw read to satisfy it. We never return more - than self.buffer_size. - """ - with self._read_lock: - return self._peek_unlocked(n) - - def _peek_unlocked(self, n=0): - want = min(n, self.buffer_size) - have = len(self._read_buf) - self._read_pos - if have < want: - to_read = self.buffer_size - have - current = self.raw.read(to_read) - if current: - self._read_buf = self._read_buf[self._read_pos:] + current - self._read_pos = 0 - return self._read_buf[self._read_pos:] - - def read1(self, n): - """Reads up to n bytes, with at most one read() system call.""" - # Returns up to n bytes. If at least one byte is buffered, we - # only return buffered bytes. Otherwise, we do one raw read. - if n <= 0: - return b"" - with self._read_lock: - self._peek_unlocked(1) - return self._read_unlocked( - min(n, len(self._read_buf) - self._read_pos)) - - def tell(self): - return self.raw.tell() - len(self._read_buf) + self._read_pos - - def seek(self, pos, whence=0): - with self._read_lock: - if whence == 1: - pos -= len(self._read_buf) - self._read_pos - pos = self.raw.seek(pos, whence) - self._reset_read_buf() - return pos - - -class BufferedWriter(_BufferedIOMixin): - - """A buffer for a writeable sequential RawIO object. - - The constructor creates a BufferedWriter for the given writeable raw - stream. If the buffer_size is not given, it defaults to - DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to - twice the buffer size. - """ - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - raw._checkWritable() - _BufferedIOMixin.__init__(self, raw) - self.buffer_size = buffer_size - self.max_buffer_size = (2*buffer_size - if max_buffer_size is None - else max_buffer_size) - self._write_buf = bytearray() - self._write_lock = threading.Lock() - - def write(self, b): - if self.closed: - raise ValueError("write to closed file") - if isinstance(b, unicode): - raise TypeError("can't write unicode to binary stream") - with self._write_lock: - # XXX we can implement some more tricks to try and avoid - # partial writes - if len(self._write_buf) > self.buffer_size: - # We're full, so let's pre-flush the buffer - try: - self._flush_unlocked() - except BlockingIOError as e: - # We can't accept anything else. - # XXX Why not just let the exception pass through? - raise BlockingIOError(e.errno, e.strerror, 0) - before = len(self._write_buf) - self._write_buf.extend(b) - written = len(self._write_buf) - before - if len(self._write_buf) > self.buffer_size: - try: - self._flush_unlocked() - except BlockingIOError as e: - if len(self._write_buf) > self.max_buffer_size: - # We've hit max_buffer_size. We have to accept a - # partial write and cut back our buffer. - overage = len(self._write_buf) - self.max_buffer_size - self._write_buf = self._write_buf[:self.max_buffer_size] - raise BlockingIOError(e.errno, e.strerror, overage) - return written - - def truncate(self, pos=None): - with self._write_lock: - self._flush_unlocked() - if pos is None: - pos = self.raw.tell() - return self.raw.truncate(pos) - - def flush(self): - with self._write_lock: - self._flush_unlocked() - - def _flush_unlocked(self): - if self.closed: - raise ValueError("flush of closed file") - written = 0 - try: - while self._write_buf: - n = self.raw.write(self._write_buf) - del self._write_buf[:n] - written += n - except BlockingIOError as e: - n = e.characters_written - del self._write_buf[:n] - written += n - raise BlockingIOError(e.errno, e.strerror, written) - - def tell(self): - return self.raw.tell() + len(self._write_buf) - - def seek(self, pos, whence=0): - with self._write_lock: - self._flush_unlocked() - return self.raw.seek(pos, whence) - - -class BufferedRWPair(BufferedIOBase): - - """A buffered reader and writer object together. - - A buffered reader object and buffered writer object put together to - form a sequential IO object that can read and write. This is typically - used with a socket or two-way pipe. - - reader and writer are RawIOBase objects that are readable and - writeable respectively. If the buffer_size is omitted it defaults to - DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer) - defaults to twice the buffer size. - """ - - # XXX The usefulness of this (compared to having two separate IO - # objects) is questionable. - - def __init__(self, reader, writer, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - """Constructor. - - The arguments are two RawIO instances. - """ - reader._checkReadable() - writer._checkWritable() - self.reader = BufferedReader(reader, buffer_size) - self.writer = BufferedWriter(writer, buffer_size, max_buffer_size) - - def read(self, n=None): - if n is None: - n = -1 - return self.reader.read(n) - - def readinto(self, b): - return self.reader.readinto(b) - - def write(self, b): - return self.writer.write(b) - - def peek(self, n=0): - return self.reader.peek(n) - - def read1(self, n): - return self.reader.read1(n) - - def readable(self): - return self.reader.readable() - - def writable(self): - return self.writer.writable() - - def flush(self): - return self.writer.flush() - - def close(self): - self.writer.close() - self.reader.close() - - def isatty(self): - return self.reader.isatty() or self.writer.isatty() - - @property - def closed(self): - return self.writer.closed - - -class BufferedRandom(BufferedWriter, BufferedReader): - - """A buffered interface to random access streams. - - The constructor creates a reader and writer for a seekable stream, - raw, given in the first argument. If the buffer_size is omitted it - defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered - writer) defaults to twice the buffer size. - """ - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - raw._checkSeekable() - BufferedReader.__init__(self, raw, buffer_size) - BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) - - def seek(self, pos, whence=0): - self.flush() - # First do the raw seek, then empty the read buffer, so that - # if the raw seek fails, we don't lose buffered data forever. - pos = self.raw.seek(pos, whence) - with self._read_lock: - self._reset_read_buf() - return pos - - def tell(self): - if self._write_buf: - return self.raw.tell() + len(self._write_buf) - else: - return BufferedReader.tell(self) - - def truncate(self, pos=None): - if pos is None: - pos = self.tell() - # Use seek to flush the read buffer. - self.seek(pos) - return BufferedWriter.truncate(self) - - def read(self, n=None): - if n is None: - n = -1 - self.flush() - return BufferedReader.read(self, n) - - def readinto(self, b): - self.flush() - return BufferedReader.readinto(self, b) - - def peek(self, n=0): - self.flush() - return BufferedReader.peek(self, n) - - def read1(self, n): - self.flush() - return BufferedReader.read1(self, n) - - def write(self, b): - if self._read_buf: - # Undo readahead - with self._read_lock: - self.raw.seek(self._read_pos - len(self._read_buf), 1) - self._reset_read_buf() - return BufferedWriter.write(self, b) - - -class TextIOBase(IOBase): - - """Base class for text I/O. - - This class provides a character and line based interface to stream - I/O. There is no readinto method because Python's character strings - are immutable. There is no public constructor. - """ - - def read(self, n = -1): - """Read at most n characters from stream. - - Read from underlying buffer until we have n characters or we hit EOF. - If n is negative or omitted, read until EOF. - """ - self._unsupported("read") - - def write(self, s): - """Write string s to stream.""" - self._unsupported("write") - - def truncate(self, pos = None): - """Truncate size to pos.""" - self._unsupported("truncate") - - def readline(self): - """Read until newline or EOF. - - Returns an empty string if EOF is hit immediately. - """ - self._unsupported("readline") - - @property - def encoding(self): - """Subclasses should override.""" - return None - - @property - def newlines(self): - """Line endings translated so far. - - Only line endings translated during reading are considered. - - Subclasses should override. - """ - return None - - -class IncrementalNewlineDecoder(codecs.IncrementalDecoder): - """Codec used when reading a file in universal newlines mode. - It wraps another incremental decoder, translating \\r\\n and \\r into \\n. - It also records the types of newlines encountered. - When used with translate=False, it ensures that the newline sequence is - returned in one piece. - """ - def __init__(self, decoder, translate, errors='strict'): - codecs.IncrementalDecoder.__init__(self, errors=errors) - self.translate = translate - self.decoder = decoder - self.seennl = 0 - self.pendingcr = False - - def decode(self, input, final=False): - # decode input (with the eventual \r from a previous pass) - output = self.decoder.decode(input, final=final) - if self.pendingcr and (output or final): - output = "\r" + output - self.pendingcr = False - - # retain last \r even when not translating data: - # then readline() is sure to get \r\n in one pass - if output.endswith("\r") and not final: - output = output[:-1] - self.pendingcr = True - - # Record which newlines are read - crlf = output.count('\r\n') - cr = output.count('\r') - crlf - lf = output.count('\n') - crlf - self.seennl |= (lf and self._LF) | (cr and self._CR) \ - | (crlf and self._CRLF) - - if self.translate: - if crlf: - output = output.replace("\r\n", "\n") - if cr: - output = output.replace("\r", "\n") - - return output - - def getstate(self): - buf, flag = self.decoder.getstate() - flag <<= 1 - if self.pendingcr: - flag |= 1 - return buf, flag - - def setstate(self, state): - buf, flag = state - self.pendingcr = bool(flag & 1) - self.decoder.setstate((buf, flag >> 1)) - - def reset(self): - self.seennl = 0 - self.pendingcr = False - self.decoder.reset() - - _LF = 1 - _CR = 2 - _CRLF = 4 - - @property - def newlines(self): - return (None, - "\n", - "\r", - ("\r", "\n"), - "\r\n", - ("\n", "\r\n"), - ("\r", "\r\n"), - ("\r", "\n", "\r\n") - )[self.seennl] - - -class TextIOWrapper(TextIOBase): - - r"""Character and line based layer over a BufferedIOBase object, buffer. - - encoding gives the name of the encoding that the stream will be - decoded or encoded with. It defaults to locale.getpreferredencoding. - - errors determines the strictness of encoding and decoding (see the - codecs.register) and defaults to "strict". - - newline can be None, '', '\n', '\r', or '\r\n'. It controls the - handling of line endings. If it is None, universal newlines is - enabled. With this enabled, on input, the lines endings '\n', '\r', - or '\r\n' are translated to '\n' before being returned to the - caller. Conversely, on output, '\n' is translated to the system - default line separator, os.linesep. If newline is any other of its - legal values, that newline becomes the newline when the file is read - and it is returned untranslated. On output, '\n' is converted to the - newline. - - If line_buffering is True, a call to flush is implied when a call to - write contains a newline character. - """ - - _CHUNK_SIZE = 128 - - def __init__(self, buffer, encoding=None, errors=None, newline=None, - line_buffering=False): - if newline not in (None, "", "\n", "\r", "\r\n"): - raise ValueError("illegal newline value: %r" % (newline,)) - if encoding is None: - try: - encoding = os.device_encoding(buffer.fileno()) - except (AttributeError, UnsupportedOperation): - pass - if encoding is None: - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "ascii" - else: - encoding = locale.getpreferredencoding() - - if not isinstance(encoding, basestring): - raise ValueError("invalid encoding: %r" % encoding) - - if errors is None: - errors = "strict" - else: - if not isinstance(errors, basestring): - raise ValueError("invalid errors: %r" % errors) - - self.buffer = buffer - self._line_buffering = line_buffering - self._encoding = encoding - self._errors = errors - self._readuniversal = not newline - self._readtranslate = newline is None - self._readnl = newline - self._writetranslate = newline != '' - self._writenl = newline or os.linesep - self._encoder = None - self._decoder = None - self._decoded_chars = '' # buffer for text returned from decoder - self._decoded_chars_used = 0 # offset into _decoded_chars for read() - self._snapshot = None # info for reconstructing decoder state - self._seekable = self._telling = self.buffer.seekable() - - # self._snapshot is either None, or a tuple (dec_flags, next_input) - # where dec_flags is the second (integer) item of the decoder state - # and next_input is the chunk of input bytes that comes next after the - # snapshot point. We use this to reconstruct decoder states in tell(). - - # Naming convention: - # - "bytes_..." for integer variables that count input bytes - # - "chars_..." for integer variables that count decoded characters - - @property - def encoding(self): - return self._encoding - - @property - def errors(self): - return self._errors - - @property - def line_buffering(self): - return self._line_buffering - - def seekable(self): - return self._seekable - - def readable(self): - return self.buffer.readable() - - def writable(self): - return self.buffer.writable() - - def flush(self): - self.buffer.flush() - self._telling = self._seekable - - def close(self): - try: - self.flush() - except: - pass # If flush() fails, just give up - self.buffer.close() - - @property - def closed(self): - return self.buffer.closed - - @property - def name(self): - return self.buffer.name - - def fileno(self): - return self.buffer.fileno() - - def isatty(self): - return self.buffer.isatty() - - def write(self, s): - if self.closed: - raise ValueError("write to closed file") - if not isinstance(s, unicode): - raise TypeError("can't write %s to text stream" % - s.__class__.__name__) - length = len(s) - haslf = (self._writetranslate or self._line_buffering) and "\n" in s - if haslf and self._writetranslate and self._writenl != "\n": - s = s.replace("\n", self._writenl) - encoder = self._encoder or self._get_encoder() - # XXX What if we were just reading? - b = encoder.encode(s) - self.buffer.write(b) - if self._line_buffering and (haslf or "\r" in s): - self.flush() - self._snapshot = None - if self._decoder: - self._decoder.reset() - return length - - def _get_encoder(self): - make_encoder = codecs.getincrementalencoder(self._encoding) - self._encoder = make_encoder(self._errors) - return self._encoder - - def _get_decoder(self): - make_decoder = codecs.getincrementaldecoder(self._encoding) - decoder = make_decoder(self._errors) - if self._readuniversal: - decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) - self._decoder = decoder - return decoder - - # The following three methods implement an ADT for _decoded_chars. - # Text returned from the decoder is buffered here until the client - # requests it by calling our read() or readline() method. - def _set_decoded_chars(self, chars): - """Set the _decoded_chars buffer.""" - self._decoded_chars = chars - self._decoded_chars_used = 0 - - def _get_decoded_chars(self, n=None): - """Advance into the _decoded_chars buffer.""" - offset = self._decoded_chars_used - if n is None: - chars = self._decoded_chars[offset:] - else: - chars = self._decoded_chars[offset:offset + n] - self._decoded_chars_used += len(chars) - return chars - - def _rewind_decoded_chars(self, n): - """Rewind the _decoded_chars buffer.""" - if self._decoded_chars_used < n: - raise AssertionError("rewind decoded_chars out of bounds") - self._decoded_chars_used -= n - - def _read_chunk(self): - """ - Read and decode the next chunk of data from the BufferedReader. - - The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). - The entire input chunk is sent to the decoder, though some of it - may remain buffered in the decoder, yet to be converted. - """ - - if self._decoder is None: - raise ValueError("no decoder") - - if self._telling: - # To prepare for tell(), we need to snapshot a point in the - # file where the decoder's input buffer is empty. - - dec_buffer, dec_flags = self._decoder.getstate() - # Given this, we know there was a valid snapshot point - # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). - - # Read a chunk, decode it, and put the result in self._decoded_chars. - input_chunk = self.buffer.read1(self._CHUNK_SIZE) - eof = not input_chunk - self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) - - if self._telling: - # At the snapshot point, len(dec_buffer) bytes before the read, - # the next input to be decoded is dec_buffer + input_chunk. - self._snapshot = (dec_flags, dec_buffer + input_chunk) - - return not eof - - def _pack_cookie(self, position, dec_flags=0, - bytes_to_feed=0, need_eof=0, chars_to_skip=0): - # The meaning of a tell() cookie is: seek to position, set the - # decoder flags to dec_flags, read bytes_to_feed bytes, feed them - # into the decoder with need_eof as the EOF flag, then skip - # chars_to_skip characters of the decoded result. For most simple - # decoders, tell() will often just give a byte offset in the file. - return (position | (dec_flags<<64) | (bytes_to_feed<<128) | - (chars_to_skip<<192) | bool(need_eof)<<256) - - def _unpack_cookie(self, bigint): - rest, position = divmod(bigint, 1<<64) - rest, dec_flags = divmod(rest, 1<<64) - rest, bytes_to_feed = divmod(rest, 1<<64) - need_eof, chars_to_skip = divmod(rest, 1<<64) - return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip - - def tell(self): - if not self._seekable: - raise IOError("underlying stream is not seekable") - if not self._telling: - raise IOError("telling position disabled by next() call") - self.flush() - position = self.buffer.tell() - decoder = self._decoder - if decoder is None or self._snapshot is None: - if self._decoded_chars: - # This should never happen. - raise AssertionError("pending decoded text") - return position - - # Skip backward to the snapshot point (see _read_chunk). - dec_flags, next_input = self._snapshot - position -= len(next_input) - - # How many decoded characters have been used up since the snapshot? - chars_to_skip = self._decoded_chars_used - if chars_to_skip == 0: - # We haven't moved from the snapshot point. - return self._pack_cookie(position, dec_flags) - - # Starting from the snapshot position, we will walk the decoder - # forward until it gives us enough decoded characters. - saved_state = decoder.getstate() - try: - # Note our initial start point. - decoder.setstate((b'', dec_flags)) - start_pos = position - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - need_eof = 0 - - # Feed the decoder one byte at a time. As we go, note the - # nearest "safe start point" before the current location - # (a point where the decoder has nothing buffered, so seek() - # can safely start from there and advance to this location). - for next_byte in next_input: - bytes_fed += 1 - chars_decoded += len(decoder.decode(next_byte)) - dec_buffer, dec_flags = decoder.getstate() - if not dec_buffer and chars_decoded <= chars_to_skip: - # Decoder buffer is empty, so this is a safe start point. - start_pos += bytes_fed - chars_to_skip -= chars_decoded - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - if chars_decoded >= chars_to_skip: - break - else: - # We didn't get enough decoded data; signal EOF to get more. - chars_decoded += len(decoder.decode(b'', final=True)) - need_eof = 1 - if chars_decoded < chars_to_skip: - raise IOError("can't reconstruct logical file position") - - # The returned cookie corresponds to the last safe start point. - return self._pack_cookie( - start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) - finally: - decoder.setstate(saved_state) - - def truncate(self, pos=None): - self.flush() - if pos is None: - pos = self.tell() - self.seek(pos) - return self.buffer.truncate() - - def seek(self, cookie, whence=0): - if self.closed: - raise ValueError("tell on closed file") - if not self._seekable: - raise IOError("underlying stream is not seekable") - if whence == 1: # seek relative to current position - if cookie != 0: - raise IOError("can't do nonzero cur-relative seeks") - # Seeking to the current position should attempt to - # sync the underlying buffer with the current position. - whence = 0 - cookie = self.tell() - if whence == 2: # seek relative to end of file - if cookie != 0: - raise IOError("can't do nonzero end-relative seeks") - self.flush() - position = self.buffer.seek(0, 2) - self._set_decoded_chars('') - self._snapshot = None - if self._decoder: - self._decoder.reset() - return position - if whence != 0: - raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % - (whence,)) - if cookie < 0: - raise ValueError("negative seek position %r" % (cookie,)) - self.flush() - - # The strategy of seek() is to go back to the safe start point - # and replay the effect of read(chars_to_skip) from there. - start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ - self._unpack_cookie(cookie) - - # Seek back to the safe start point. - self.buffer.seek(start_pos) - self._set_decoded_chars('') - self._snapshot = None - - # Restore the decoder to its state from the safe start point. - if self._decoder or dec_flags or chars_to_skip: - self._decoder = self._decoder or self._get_decoder() - self._decoder.setstate((b'', dec_flags)) - self._snapshot = (dec_flags, b'') - - if chars_to_skip: - # Just like _read_chunk, feed the decoder and save a snapshot. - input_chunk = self.buffer.read(bytes_to_feed) - self._set_decoded_chars( - self._decoder.decode(input_chunk, need_eof)) - self._snapshot = (dec_flags, input_chunk) - - # Skip chars_to_skip of the decoded characters. - if len(self._decoded_chars) < chars_to_skip: - raise IOError("can't restore logical file position") - self._decoded_chars_used = chars_to_skip - - return cookie - - def read(self, n=None): - if n is None: - n = -1 - decoder = self._decoder or self._get_decoder() - if n < 0: - # Read everything. - result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) - self._set_decoded_chars('') - self._snapshot = None - return result - else: - # Keep reading chunks until we have n characters to return. - eof = False - result = self._get_decoded_chars(n) - while len(result) < n and not eof: - eof = not self._read_chunk() - result += self._get_decoded_chars(n - len(result)) - return result - - def next(self): - self._telling = False - line = self.readline() - if not line: - self._snapshot = None - self._telling = self._seekable - raise StopIteration - return line - - def readline(self, limit=None): - if self.closed: - raise ValueError("read from closed file") - if limit is None: - limit = -1 - if not isinstance(limit, (int, long)): - raise TypeError("limit must be an integer") - - # Grab all the decoded text (we will rewind any extra bits later). - line = self._get_decoded_chars() - - start = 0 - decoder = self._decoder or self._get_decoder() - - pos = endpos = None - while True: - if self._readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start) - if pos >= 0: - endpos = pos + 1 - break - else: - start = len(line) - - elif self._readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - - # In C we'd look for these in parallel of course. - nlpos = line.find("\n", start) - crpos = line.find("\r", start) - if crpos == -1: - if nlpos == -1: - # Nothing found - start = len(line) - else: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == -1: - # Found lone \r - endpos = crpos + 1 - break - elif nlpos < crpos: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == crpos + 1: - # Found \r\n - endpos = crpos + 2 - break - else: - # Found \r - endpos = crpos + 1 - break - else: - # non-universal - pos = line.find(self._readnl) - if pos >= 0: - endpos = pos + len(self._readnl) - break - - if limit >= 0 and len(line) >= limit: - endpos = limit # reached length limit - break - - # No line ending seen yet - get more data - more_line = '' - while self._read_chunk(): - if self._decoded_chars: - break - if self._decoded_chars: - line += self._get_decoded_chars() - else: - # end of file - self._set_decoded_chars('') - self._snapshot = None - return line - - if limit >= 0 and endpos > limit: - endpos = limit # don't exceed limit - - # Rewind _decoded_chars to just after the line ending we found. - self._rewind_decoded_chars(len(line) - endpos) - return line[:endpos] - - @property - def newlines(self): - return self._decoder.newlines if self._decoder else None - -class StringIO(TextIOWrapper): - - """An in-memory stream for text. The initial_value argument sets the - value of object. The other arguments are like those of TextIOWrapper's - constructor. - """ - - def __init__(self, initial_value="", encoding="utf-8", - errors="strict", newline="\n"): - super(StringIO, self).__init__(BytesIO(), - encoding=encoding, - errors=errors, - newline=newline) - if initial_value: - if not isinstance(initial_value, unicode): - initial_value = unicode(initial_value) - self.write(initial_value) - self.seek(0) - - def getvalue(self): - self.flush() - return self.buffer.getvalue().decode(self._encoding, self._errors) +for klass in (StringIO, TextIOWrapper): + TextIOBase.register(klass) +del klass diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_bufio.py --- a/Lib/test/test_bufio.py +++ b/Lib/test/test_bufio.py @@ -1,12 +1,15 @@ import unittest -from test import test_support +from test import test_support as support -# Simple test to ensure that optimizations in fileobject.c deliver -# the expected results. For best testing, run this under a debug-build -# Python too (to exercise asserts in the C code). +import io # C implementation. +import _pyio as pyio # Python implementation. -lengths = range(1, 257) + [512, 1000, 1024, 2048, 4096, 8192, 10000, - 16384, 32768, 65536, 1000000] +# Simple test to ensure that optimizations in the IO library deliver the +# expected results. For best testing, run this under a debug-build Python too +# (to exercise asserts in the C code). + +lengths = list(range(1, 257)) + [512, 1000, 1024, 2048, 4096, 8192, 10000, + 16384, 32768, 65536, 1000000] class BufferSizeTest(unittest.TestCase): def try_one(self, s): @@ -14,27 +17,27 @@ class BufferSizeTest(unittest.TestCase): # .readline()s deliver what we wrote. # Ensure we can open TESTFN for writing. - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) # Since C doesn't guarantee we can write/read arbitrary bytes in text # files, use binary mode. - f = open(test_support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") try: # write once with \n and once without f.write(s) - f.write("\n") + f.write(b"\n") f.write(s) f.close() - f = open(test_support.TESTFN, "rb") + f = open(support.TESTFN, "rb") line = f.readline() - self.assertEqual(line, s + "\n") + self.assertEqual(line, s + b"\n") line = f.readline() self.assertEqual(line, s) line = f.readline() self.assert_(not line) # Must be at EOF f.close() finally: - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) def drive_one(self, pattern): for length in lengths: @@ -47,19 +50,27 @@ class BufferSizeTest(unittest.TestCase): teststring = pattern * q + pattern[:r] self.assertEqual(len(teststring), length) self.try_one(teststring) - self.try_one(teststring + "x") + self.try_one(teststring + b"x") self.try_one(teststring[:-1]) def test_primepat(self): # A pattern with prime length, to avoid simple relationships with # stdio buffer sizes. - self.drive_one("1234567890\00\01\02\03\04\05\06") + self.drive_one(b"1234567890\00\01\02\03\04\05\06") def test_nullpat(self): - self.drive_one("\0" * 1000) + self.drive_one(bytes(1000)) + + +class CBufferSizeTest(BufferSizeTest): + open = io.open + +class PyBufferSizeTest(BufferSizeTest): + open = staticmethod(pyio.open) + def test_main(): - test_support.run_unittest(BufferSizeTest) + support.run_unittest(CBufferSizeTest, PyBufferSizeTest) if __name__ == "__main__": test_main() diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_file.py --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -1,13 +1,14 @@ +from __future__ import print_function + import sys import os import unittest -import itertools -import time -import threading from array import array from weakref import proxy -from test import test_support +import io +import _pyio as pyio + from test.test_support import TESTFN, findfile, run_unittest from UserList import UserList @@ -15,7 +16,7 @@ class AutoFileTests(unittest.TestCase): # file tests for which a test file is automatically set up def setUp(self): - self.f = open(TESTFN, 'wb') + self.f = self.open(TESTFN, 'wb') def tearDown(self): if self.f: @@ -25,7 +26,7 @@ class AutoFileTests(unittest.TestCase): def testWeakRefs(self): # verify weak references p = proxy(self.f) - p.write('teststring') + p.write(b'teststring') self.assertEquals(self.f.tell(), p.tell()) self.f.close() self.f = None @@ -34,35 +35,35 @@ class AutoFileTests(unittest.TestCase): def testAttributes(self): # verify expected attributes exist f = self.f - softspace = f.softspace f.name # merely shouldn't blow up f.mode # ditto f.closed # ditto - # verify softspace is writable - f.softspace = softspace # merely shouldn't blow up - - # verify the others aren't - for attr in 'name', 'mode', 'closed': - self.assertRaises((AttributeError, TypeError), setattr, f, attr, 'oops') - def testReadinto(self): # verify readinto - self.f.write('12') + self.f.write(b'12') self.f.close() - a = array('c', 'x'*10) - self.f = open(TESTFN, 'rb') + a = array('b', b'x'*10) + self.f = self.open(TESTFN, 'rb') n = self.f.readinto(a) - self.assertEquals('12', a.tostring()[:n]) + self.assertEquals(b'12', a.tostring()[:n]) + + def testReadinto_text(self): + # verify readinto refuses text files + a = array('b', b'x'*10) + self.f.close() + self.f = self.open(TESTFN, 'r') + if hasattr(self.f, "readinto"): + self.assertRaises(TypeError, self.f.readinto, a) def testWritelinesUserList(self): # verify writelines with instance sequence - l = UserList(['1', '2']) + l = UserList([b'1', b'2']) self.f.writelines(l) self.f.close() - self.f = open(TESTFN, 'rb') + self.f = self.open(TESTFN, 'rb') buf = self.f.read() - self.assertEquals(buf, '12') + self.assertEquals(buf, b'12') def testWritelinesIntegers(self): # verify writelines with integers @@ -81,36 +82,43 @@ class AutoFileTests(unittest.TestCase): self.assertRaises(TypeError, self.f.writelines, [NonString(), NonString()]) - def testRepr(self): - # verify repr works - self.assert_(repr(self.f).startswith(">sys.__stdout__, ( + print(( ' Skipping sys.stdin.seek(-1), it may crash the interpreter.' - ' Test manually.') - self.assertRaises(IOError, sys.stdin.truncate) - - def testUnicodeOpen(self): - # verify repr works for unicode too - f = open(unicode(TESTFN), "w") - self.assert_(repr(f).startswith(" + # "file.truncate fault on windows" + os.unlink(TESTFN) + f = self.open(TESTFN, 'wb') - def bug801631(): - # SF bug - # "file.truncate fault on windows" - f = open(TESTFN, 'wb') - f.write('12345678901') # 11 bytes + try: + f.write(b'12345678901') # 11 bytes f.close() - f = open(TESTFN,'rb+') + f = self.open(TESTFN,'rb+') data = f.read(5) - if data != '12345': + if data != b'12345': self.fail("Read on file opened for update failed %r" % data) if f.tell() != 5: self.fail("File pos after read wrong %d" % f.tell()) @@ -234,56 +220,42 @@ class OtherFileTests(unittest.TestCase): size = os.path.getsize(TESTFN) if size != 5: self.fail("File size after ftruncate wrong %d" % size) - - try: - bug801631() finally: + f.close() os.unlink(TESTFN) def testIteration(self): # Test the complex interaction when mixing file-iteration and the - # various read* methods. Ostensibly, the mixture could just be tested - # to work when it should work according to the Python language, - # instead of fail when it should fail according to the current CPython - # implementation. People don't always program Python the way they - # should, though, and the implemenation might change in subtle ways, - # so we explicitly test for errors, too; the test will just have to - # be updated when the implementation changes. + # various read* methods. dataoffset = 16384 - filler = "ham\n" + filler = b"ham\n" assert not dataoffset % len(filler), \ "dataoffset must be multiple of len(filler)" nchunks = dataoffset // len(filler) testlines = [ - "spam, spam and eggs\n", - "eggs, spam, ham and spam\n", - "saussages, spam, spam and eggs\n", - "spam, ham, spam and eggs\n", - "spam, spam, spam, spam, spam, ham, spam\n", - "wonderful spaaaaaam.\n" + b"spam, spam and eggs\n", + b"eggs, spam, ham and spam\n", + b"saussages, spam, spam and eggs\n", + b"spam, ham, spam and eggs\n", + b"spam, spam, spam, spam, spam, ham, spam\n", + b"wonderful spaaaaaam.\n" ] methods = [("readline", ()), ("read", ()), ("readlines", ()), - ("readinto", (array("c", " "*100),))] + ("readinto", (array("b", b" "*100),))] try: # Prepare the testfile - bag = open(TESTFN, "w") + bag = self.open(TESTFN, "wb") bag.write(filler * nchunks) bag.writelines(testlines) bag.close() # Test for appropriate errors mixing read* and iteration for methodname, args in methods: - f = open(TESTFN) - if f.next() != filler: + f = self.open(TESTFN, 'rb') + if next(f) != filler: self.fail, "Broken testfile" meth = getattr(f, methodname) - try: - meth(*args) - except ValueError: - pass - else: - self.fail("%s%r after next() didn't raise ValueError" % - (methodname, args)) + meth(*args) # This simply shouldn't fail f.close() # Test to see if harmless (by accident) mixing of read* and @@ -293,9 +265,9 @@ class OtherFileTests(unittest.TestCase): # ("h", "a", "m", "\n"), so 4096 lines of that should get us # exactly on the buffer boundary for any power-of-2 buffersize # between 4 and 16384 (inclusive). - f = open(TESTFN) + f = self.open(TESTFN, 'rb') for i in range(nchunks): - f.next() + next(f) testline = testlines.pop(0) try: line = f.readline() @@ -306,7 +278,7 @@ class OtherFileTests(unittest.TestCase): self.fail("readline() after next() with empty buffer " "failed. Got %r, expected %r" % (line, testline)) testline = testlines.pop(0) - buf = array("c", "\x00" * len(testline)) + buf = array("b", b"\x00" * len(testline)) try: f.readinto(buf) except ValueError: @@ -335,7 +307,7 @@ class OtherFileTests(unittest.TestCase): self.fail("readlines() after next() with empty buffer " "failed. Got %r, expected %r" % (line, testline)) # Reading after iteration hit EOF shouldn't hurt either - f = open(TESTFN) + f = self.open(TESTFN, 'rb') try: for line in f: pass @@ -351,222 +323,19 @@ class OtherFileTests(unittest.TestCase): finally: os.unlink(TESTFN) -class FileSubclassTests(unittest.TestCase): +class COtherFileTests(OtherFileTests): + open = io.open - def testExit(self): - # test that exiting with context calls subclass' close - class C(file): - def __init__(self, *args): - self.subclass_closed = False - file.__init__(self, *args) - def close(self): - self.subclass_closed = True - file.close(self) - - with C(TESTFN, 'w') as f: - pass - self.failUnless(f.subclass_closed) - - -class FileThreadingTests(unittest.TestCase): - # These tests check the ability to call various methods of file objects - # (including close()) concurrently without crashing the Python interpreter. - # See #815646, #595601 - - def setUp(self): - self.f = None - self.filename = TESTFN - with open(self.filename, "w") as f: - f.write("\n".join("0123456789")) - self._count_lock = threading.Lock() - self.close_count = 0 - self.close_success_count = 0 - - def tearDown(self): - if self.f: - try: - self.f.close() - except (EnvironmentError, ValueError): - pass - try: - os.remove(self.filename) - except EnvironmentError: - pass - - def _create_file(self): - self.f = open(self.filename, "w+") - - def _close_file(self): - with self._count_lock: - self.close_count += 1 - self.f.close() - with self._count_lock: - self.close_success_count += 1 - - def _close_and_reopen_file(self): - self._close_file() - # if close raises an exception thats fine, self.f remains valid so - # we don't need to reopen. - self._create_file() - - def _run_workers(self, func, nb_workers, duration=0.2): - with self._count_lock: - self.close_count = 0 - self.close_success_count = 0 - self.do_continue = True - threads = [] - try: - for i in range(nb_workers): - t = threading.Thread(target=func) - t.start() - threads.append(t) - for _ in xrange(100): - time.sleep(duration/100) - with self._count_lock: - if self.close_count-self.close_success_count > nb_workers+1: - if test_support.verbose: - print 'Q', - break - time.sleep(duration) - finally: - self.do_continue = False - for t in threads: - t.join() - - def _test_close_open_io(self, io_func, nb_workers=5): - def worker(): - self._create_file() - funcs = itertools.cycle(( - lambda: io_func(), - lambda: self._close_and_reopen_file(), - )) - for f in funcs: - if not self.do_continue: - break - try: - f() - except (IOError, ValueError): - pass - self._run_workers(worker, nb_workers) - if test_support.verbose: - # Useful verbose statistics when tuning this test to take - # less time to run but still ensuring that its still useful. - # - # the percent of close calls that raised an error - percent = 100. - 100.*self.close_success_count/self.close_count - print self.close_count, ('%.4f ' % percent), - - def test_close_open(self): - def io_func(): - pass - self._test_close_open_io(io_func) - - def test_close_open_flush(self): - def io_func(): - self.f.flush() - self._test_close_open_io(io_func) - - def test_close_open_iter(self): - def io_func(): - list(iter(self.f)) - self._test_close_open_io(io_func) - - def test_close_open_isatty(self): - def io_func(): - self.f.isatty() - self._test_close_open_io(io_func) - - def test_close_open_print(self): - def io_func(): - print >> self.f, '' - self._test_close_open_io(io_func) - - def test_close_open_read(self): - def io_func(): - self.f.read(0) - self._test_close_open_io(io_func) - - def test_close_open_readinto(self): - def io_func(): - a = array('c', 'xxxxx') - self.f.readinto(a) - self._test_close_open_io(io_func) - - def test_close_open_readline(self): - def io_func(): - self.f.readline() - self._test_close_open_io(io_func) - - def test_close_open_readlines(self): - def io_func(): - self.f.readlines() - self._test_close_open_io(io_func) - - def test_close_open_seek(self): - def io_func(): - self.f.seek(0, 0) - self._test_close_open_io(io_func) - - def test_close_open_tell(self): - def io_func(): - self.f.tell() - self._test_close_open_io(io_func) - - def test_close_open_truncate(self): - def io_func(): - self.f.truncate() - self._test_close_open_io(io_func) - - def test_close_open_write(self): - def io_func(): - self.f.write('') - self._test_close_open_io(io_func) - - def test_close_open_writelines(self): - def io_func(): - self.f.writelines('') - self._test_close_open_io(io_func) - - -class StdoutTests(unittest.TestCase): - - def test_move_stdout_on_write(self): - # Issue 3242: sys.stdout can be replaced (and freed) during a - # print statement; prevent a segfault in this case - save_stdout = sys.stdout - - class File: - def write(self, data): - if '\n' in data: - sys.stdout = save_stdout - - try: - sys.stdout = File() - print "some text" - finally: - sys.stdout = save_stdout - - def test_del_stdout_before_print(self): - # Issue 4597: 'print' with no argument wasn't reporting when - # sys.stdout was deleted. - save_stdout = sys.stdout - del sys.stdout - try: - print - except RuntimeError as e: - self.assertEquals(str(e), "lost sys.stdout") - else: - self.fail("Expected RuntimeError") - finally: - sys.stdout = save_stdout +class PyOtherFileTests(OtherFileTests): + open = staticmethod(pyio.open) def test_main(): # Historically, these tests have been sloppy about removing TESTFN. # So get rid of it no matter what. try: - run_unittest(AutoFileTests, OtherFileTests, FileSubclassTests, - FileThreadingTests, StdoutTests) + run_unittest(CAutoFileTests, PyAutoFileTests, + COtherFileTests, PyOtherFileTests) finally: if os.path.exists(TESTFN): os.unlink(TESTFN) diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_fileio.py --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -1,23 +1,26 @@ # Adapted from test_file.py by Daniel Stutzbach -#from __future__ import unicode_literals + +from __future__ import unicode_literals import sys import os +import errno import unittest from array import array from weakref import proxy +from functools import wraps from test.test_support import (TESTFN, findfile, check_warnings, run_unittest, make_bad_fd) -from UserList import UserList +from test.test_support import py3k_bytes as bytes -import _fileio +from _io import FileIO as _FileIO class AutoFileTests(unittest.TestCase): # file tests for which a test file is automatically set up def setUp(self): - self.f = _fileio._FileIO(TESTFN, 'w') + self.f = _FileIO(TESTFN, 'w') def tearDown(self): if self.f: @@ -34,7 +37,7 @@ class AutoFileTests(unittest.TestCase): self.assertRaises(ReferenceError, getattr, p, 'tell') def testSeekTell(self): - self.f.write(bytes(bytearray(range(20)))) + self.f.write(bytes(range(20))) self.assertEquals(self.f.tell(), 20) self.f.seek(0) self.assertEquals(self.f.tell(), 0) @@ -61,17 +64,21 @@ class AutoFileTests(unittest.TestCase): def testReadinto(self): # verify readinto - self.f.write(bytes(bytearray([1, 2]))) + self.f.write(b"\x01\x02") self.f.close() - a = array('b', b'x'*10) - self.f = _fileio._FileIO(TESTFN, 'r') + a = array(b'b', b'x'*10) + self.f = _FileIO(TESTFN, 'r') n = self.f.readinto(a) - self.assertEquals(array('b', [1, 2]), a[:n]) + self.assertEquals(array(b'b', [1, 2]), a[:n]) def testRepr(self): - self.assertEquals(repr(self.f), - "_fileio._FileIO(%d, %s)" % (self.f.fileno(), - repr(self.f.mode))) + self.assertEquals(repr(self.f), "<_io.FileIO name=%r mode='%s'>" + % (self.f.name, self.f.mode)) + del self.f.name + self.assertEquals(repr(self.f), "<_io.FileIO fd=%r mode='%s'>" + % (self.f.fileno(), self.f.mode)) + self.f.close() + self.assertEquals(repr(self.f), "<_io.FileIO [closed]>") def testErrors(self): f = self.f @@ -81,7 +88,7 @@ class AutoFileTests(unittest.TestCase): self.assertRaises(ValueError, f.read, 10) # Open for reading f.close() self.assert_(f.closed) - f = _fileio._FileIO(TESTFN, 'r') + f = _FileIO(TESTFN, 'r') self.assertRaises(TypeError, f.readinto, "") self.assert_(not f.closed) f.close() @@ -107,31 +114,131 @@ class AutoFileTests(unittest.TestCase): # Windows always returns "[Errno 13]: Permission denied # Unix calls dircheck() and returns "[Errno 21]: Is a directory" try: - _fileio._FileIO('.', 'r') + _FileIO('.', 'r') except IOError as e: self.assertNotEqual(e.errno, 0) self.assertEqual(e.filename, ".") else: self.fail("Should have raised IOError") + #A set of functions testing that we get expected behaviour if someone has + #manually closed the internal file descriptor. First, a decorator: + def ClosedFD(func): + @wraps(func) + def wrapper(self): + #forcibly close the fd before invoking the problem function + f = self.f + os.close(f.fileno()) + try: + func(self, f) + finally: + try: + self.f.close() + except IOError: + pass + return wrapper + + def ClosedFDRaises(func): + @wraps(func) + def wrapper(self): + #forcibly close the fd before invoking the problem function + f = self.f + os.close(f.fileno()) + try: + func(self, f) + except IOError as e: + self.assertEqual(e.errno, errno.EBADF) + else: + self.fail("Should have raised IOError") + finally: + try: + self.f.close() + except IOError: + pass + return wrapper + + @ClosedFDRaises + def testErrnoOnClose(self, f): + f.close() + + @ClosedFDRaises + def testErrnoOnClosedWrite(self, f): + f.write('a') + + @ClosedFDRaises + def testErrnoOnClosedSeek(self, f): + f.seek(0) + + @ClosedFDRaises + def testErrnoOnClosedTell(self, f): + f.tell() + + @ClosedFDRaises + def testErrnoOnClosedTruncate(self, f): + f.truncate(0) + + @ClosedFD + def testErrnoOnClosedSeekable(self, f): + f.seekable() + + @ClosedFD + def testErrnoOnClosedReadable(self, f): + f.readable() + + @ClosedFD + def testErrnoOnClosedWritable(self, f): + f.writable() + + @ClosedFD + def testErrnoOnClosedFileno(self, f): + f.fileno() + + @ClosedFD + def testErrnoOnClosedIsatty(self, f): + self.assertEqual(f.isatty(), False) + + def ReopenForRead(self): + try: + self.f.close() + except IOError: + pass + self.f = _FileIO(TESTFN, 'r') + os.close(self.f.fileno()) + return self.f + + @ClosedFDRaises + def testErrnoOnClosedRead(self, f): + f = self.ReopenForRead() + f.read(1) + + @ClosedFDRaises + def testErrnoOnClosedReadall(self, f): + f = self.ReopenForRead() + f.readall() + + @ClosedFDRaises + def testErrnoOnClosedReadinto(self, f): + f = self.ReopenForRead() + a = array(b'b', b'x'*10) + f.readinto(a) class OtherFileTests(unittest.TestCase): def testAbles(self): try: - f = _fileio._FileIO(TESTFN, "w") + f = _FileIO(TESTFN, "w") self.assertEquals(f.readable(), False) self.assertEquals(f.writable(), True) self.assertEquals(f.seekable(), True) f.close() - f = _fileio._FileIO(TESTFN, "r") + f = _FileIO(TESTFN, "r") self.assertEquals(f.readable(), True) self.assertEquals(f.writable(), False) self.assertEquals(f.seekable(), True) f.close() - f = _fileio._FileIO(TESTFN, "a+") + f = _FileIO(TESTFN, "a+") self.assertEquals(f.readable(), True) self.assertEquals(f.writable(), True) self.assertEquals(f.seekable(), True) @@ -140,14 +247,14 @@ class OtherFileTests(unittest.TestCase): if sys.platform != "win32": try: - f = _fileio._FileIO("/dev/tty", "a") + f = _FileIO("/dev/tty", "a") except EnvironmentError: # When run in a cron job there just aren't any # ttys, so skip the test. This also handles other # OS'es that don't support /dev/tty. pass else: - f = _fileio._FileIO("/dev/tty", "a") + f = _FileIO("/dev/tty", "a") self.assertEquals(f.readable(), False) self.assertEquals(f.writable(), True) if sys.platform != "darwin" and \ @@ -164,7 +271,7 @@ class OtherFileTests(unittest.TestCase): # check invalid mode strings for mode in ("", "aU", "wU+", "rw", "rt"): try: - f = _fileio._FileIO(TESTFN, mode) + f = _FileIO(TESTFN, mode) except ValueError: pass else: @@ -173,19 +280,35 @@ class OtherFileTests(unittest.TestCase): def testUnicodeOpen(self): # verify repr works for unicode too - f = _fileio._FileIO(str(TESTFN), "w") + f = _FileIO(str(TESTFN), "w") f.close() os.unlink(TESTFN) + def testBytesOpen(self): + # Opening a bytes filename + try: + fn = TESTFN.encode("ascii") + except UnicodeEncodeError: + # Skip test + return + f = _FileIO(fn, "w") + try: + f.write(b"abc") + f.close() + with open(TESTFN, "rb") as f: + self.assertEquals(f.read(), b"abc") + finally: + os.unlink(TESTFN) + def testInvalidFd(self): - self.assertRaises(ValueError, _fileio._FileIO, -10) - self.assertRaises(OSError, _fileio._FileIO, make_bad_fd()) + self.assertRaises(ValueError, _FileIO, -10) + self.assertRaises(OSError, _FileIO, make_bad_fd()) def testBadModeArgument(self): # verify that we get a sensible error message for bad mode argument bad_mode = "qwerty" try: - f = _fileio._FileIO(TESTFN, bad_mode) + f = _FileIO(TESTFN, bad_mode) except ValueError as msg: if msg.args[0] != 0: s = str(msg) @@ -201,13 +324,13 @@ class OtherFileTests(unittest.TestCase): def bug801631(): # SF bug # "file.truncate fault on windows" - f = _fileio._FileIO(TESTFN, 'w') - f.write(bytes(bytearray(range(11)))) + f = _FileIO(TESTFN, 'w') + f.write(bytes(range(11))) f.close() - f = _fileio._FileIO(TESTFN,'r+') + f = _FileIO(TESTFN,'r+') data = f.read(5) - if data != bytes(bytearray(range(5))): + if data != bytes(range(5)): self.fail("Read on file opened for update failed %r" % data) if f.tell() != 5: self.fail("File pos after read wrong %d" % f.tell()) @@ -245,14 +368,14 @@ class OtherFileTests(unittest.TestCase): pass def testInvalidInit(self): - self.assertRaises(TypeError, _fileio._FileIO, "1", 0, 0) + self.assertRaises(TypeError, _FileIO, "1", 0, 0) def testWarnings(self): with check_warnings() as w: self.assertEqual(w.warnings, []) - self.assertRaises(TypeError, _fileio._FileIO, []) + self.assertRaises(TypeError, _FileIO, []) self.assertEqual(w.warnings, []) - self.assertRaises(ValueError, _fileio._FileIO, "/some/invalid/name", "rt") + self.assertRaises(ValueError, _FileIO, "/some/invalid/name", "rt") self.assertEqual(w.warnings, []) diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_io.py --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1,4 +1,24 @@ -"""Unit tests for io.py.""" +"""Unit tests for the io module.""" + +# Tests of io are scattered over the test suite: +# * test_bufio - tests file buffering +# * test_memoryio - tests BytesIO and StringIO +# * test_fileio - tests FileIO +# * test_file - tests the file interface +# * test_io - tests everything else in the io module +# * test_univnewlines - tests universal newline support +# * test_largefile - tests operations on a file greater than 2**32 bytes +# (only enabled with -ulargefile) + +################################################################################ +# ATTENTION TEST WRITERS!!! +################################################################################ +# When writing tests for io, it's important to test both the C and Python +# implementations. This is usually done by writing a base test that refers to +# the type it is testing as a attribute. Then it provides custom subclasses to +# test both implementations. This file has lots of examples. +################################################################################ + from __future__ import print_function from __future__ import unicode_literals @@ -9,27 +29,43 @@ import array import threading import random import unittest -from itertools import chain, cycle -from test import test_support +import warnings +import weakref +import gc +import abc +from itertools import chain, cycle, count +from collections import deque +from test import test_support as support import codecs -import io # The module under test +import io # C implementation of io +import _pyio as pyio # Python implementation of io +__metaclass__ = type +bytes = support.py3k_bytes -class MockRawIO(io.RawIOBase): +def _default_chunk_size(): + """Get the default TextIOWrapper chunk size""" + with io.open(__file__, "r", encoding="latin1") as f: + return f._CHUNK_SIZE + + +class MockRawIO: def __init__(self, read_stack=()): self._read_stack = list(read_stack) self._write_stack = [] + self._reads = 0 def read(self, n=None): + self._reads += 1 try: return self._read_stack.pop(0) except: return b"" def write(self, b): - self._write_stack.append(b[:]) + self._write_stack.append(bytes(b)) return len(b) def writable(self): @@ -45,46 +81,156 @@ class MockRawIO(io.RawIOBase): return True def seek(self, pos, whence): - pass + return 0 # wrong but we gotta return something def tell(self): - return 42 + return 0 # same comment as above + def readinto(self, buf): + self._reads += 1 + max_len = len(buf) + try: + data = self._read_stack[0] + except IndexError: + return 0 + if data is None: + del self._read_stack[0] + return None + n = len(data) + if len(data) <= max_len: + del self._read_stack[0] + buf[:n] = data + return n + else: + buf[:] = data[:max_len] + self._read_stack[0] = data[max_len:] + return max_len -class MockFileIO(io.BytesIO): + def truncate(self, pos=None): + return pos + +class CMockRawIO(MockRawIO, io.RawIOBase): + pass + +class PyMockRawIO(MockRawIO, pyio.RawIOBase): + pass + + +class MisbehavedRawIO(MockRawIO): + def write(self, b): + return MockRawIO.write(self, b) * 2 + + def read(self, n=None): + return MockRawIO.read(self, n) * 2 + + def seek(self, pos, whence): + return -123 + + def tell(self): + return -456 + + def readinto(self, buf): + MockRawIO.readinto(self, buf) + return len(buf) * 5 + +class CMisbehavedRawIO(MisbehavedRawIO, io.RawIOBase): + pass + +class PyMisbehavedRawIO(MisbehavedRawIO, pyio.RawIOBase): + pass + + +class CloseFailureIO(MockRawIO): + closed = 0 + + def close(self): + if not self.closed: + self.closed = 1 + raise IOError + +class CCloseFailureIO(CloseFailureIO, io.RawIOBase): + pass + +class PyCloseFailureIO(CloseFailureIO, pyio.RawIOBase): + pass + + +class MockFileIO: def __init__(self, data): self.read_history = [] - io.BytesIO.__init__(self, data) + super(MockFileIO, self).__init__(data) def read(self, n=None): - res = io.BytesIO.read(self, n) + res = super(MockFileIO, self).read(n) self.read_history.append(None if res is None else len(res)) return res + def readinto(self, b): + res = super(MockFileIO, self).readinto(b) + self.read_history.append(res) + return res -class MockNonBlockWriterIO(io.RawIOBase): +class CMockFileIO(MockFileIO, io.BytesIO): + pass - def __init__(self, blocking_script): - self._blocking_script = list(blocking_script) +class PyMockFileIO(MockFileIO, pyio.BytesIO): + pass + + +class MockNonBlockWriterIO: + + def __init__(self): self._write_stack = [] + self._blocker_char = None - def write(self, b): - self._write_stack.append(b[:]) - n = self._blocking_script.pop(0) - if (n < 0): - raise io.BlockingIOError(0, "test blocking", -n) - else: - return n + def pop_written(self): + s = b"".join(self._write_stack) + self._write_stack[:] = [] + return s + + def block_on(self, char): + """Block when a given char is encountered.""" + self._blocker_char = char + + def readable(self): + return True + + def seekable(self): + return True def writable(self): return True + def write(self, b): + b = bytes(b) + n = -1 + if self._blocker_char: + try: + n = b.index(self._blocker_char) + except ValueError: + pass + else: + self._blocker_char = None + self._write_stack.append(b[:n]) + raise self.BlockingIOError(0, "test blocking", n) + self._write_stack.append(b) + return len(b) + +class CMockNonBlockWriterIO(MockNonBlockWriterIO, io.RawIOBase): + BlockingIOError = io.BlockingIOError + +class PyMockNonBlockWriterIO(MockNonBlockWriterIO, pyio.RawIOBase): + BlockingIOError = pyio.BlockingIOError + class IOTest(unittest.TestCase): + def setUp(self): + support.unlink(support.TESTFN) + def tearDown(self): - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) def write_ops(self, f): self.assertEqual(f.write(b"blah."), 5) @@ -149,60 +295,71 @@ class IOTest(unittest.TestCase): self.assertEqual(f.seek(-1, 2), self.LARGE) self.assertEqual(f.read(2), b"x") + def test_invalid_operations(self): + # Try writing on a file opened in read mode and vice-versa. + for mode in ("w", "wb"): + with open(support.TESTFN, mode) as fp: + self.assertRaises(IOError, fp.read) + self.assertRaises(IOError, fp.readline) + with open(support.TESTFN, "rb") as fp: + self.assertRaises(IOError, fp.write, b"blah") + self.assertRaises(IOError, fp.writelines, [b"blah\n"]) + with open(support.TESTFN, "r") as fp: + self.assertRaises(IOError, fp.write, "blah") + self.assertRaises(IOError, fp.writelines, ["blah\n"]) + def test_raw_file_io(self): - f = io.open(test_support.TESTFN, "wb", buffering=0) - self.assertEqual(f.readable(), False) - self.assertEqual(f.writable(), True) - self.assertEqual(f.seekable(), True) - self.write_ops(f) - f.close() - f = io.open(test_support.TESTFN, "rb", buffering=0) - self.assertEqual(f.readable(), True) - self.assertEqual(f.writable(), False) - self.assertEqual(f.seekable(), True) - self.read_ops(f) - f.close() + with self.open(support.TESTFN, "wb", buffering=0) as f: + self.assertEqual(f.readable(), False) + self.assertEqual(f.writable(), True) + self.assertEqual(f.seekable(), True) + self.write_ops(f) + with self.open(support.TESTFN, "rb", buffering=0) as f: + self.assertEqual(f.readable(), True) + self.assertEqual(f.writable(), False) + self.assertEqual(f.seekable(), True) + self.read_ops(f) def test_buffered_file_io(self): - f = io.open(test_support.TESTFN, "wb") - self.assertEqual(f.readable(), False) - self.assertEqual(f.writable(), True) - self.assertEqual(f.seekable(), True) - self.write_ops(f) - f.close() - f = io.open(test_support.TESTFN, "rb") - self.assertEqual(f.readable(), True) - self.assertEqual(f.writable(), False) - self.assertEqual(f.seekable(), True) - self.read_ops(f, True) - f.close() + with self.open(support.TESTFN, "wb") as f: + self.assertEqual(f.readable(), False) + self.assertEqual(f.writable(), True) + self.assertEqual(f.seekable(), True) + self.write_ops(f) + with self.open(support.TESTFN, "rb") as f: + self.assertEqual(f.readable(), True) + self.assertEqual(f.writable(), False) + self.assertEqual(f.seekable(), True) + self.read_ops(f, True) def test_readline(self): - f = io.open(test_support.TESTFN, "wb") - f.write(b"abc\ndef\nxyzzy\nfoo") - f.close() - f = io.open(test_support.TESTFN, "rb") - self.assertEqual(f.readline(), b"abc\n") - self.assertEqual(f.readline(10), b"def\n") - self.assertEqual(f.readline(2), b"xy") - self.assertEqual(f.readline(4), b"zzy\n") - self.assertEqual(f.readline(), b"foo") - f.close() + with self.open(support.TESTFN, "wb") as f: + f.write(b"abc\ndef\nxyzzy\nfoo\x00bar\nanother line") + with self.open(support.TESTFN, "rb") as f: + self.assertEqual(f.readline(), b"abc\n") + self.assertEqual(f.readline(10), b"def\n") + self.assertEqual(f.readline(2), b"xy") + self.assertEqual(f.readline(4), b"zzy\n") + self.assertEqual(f.readline(), b"foo\x00bar\n") + self.assertEqual(f.readline(), b"another line") + self.assertRaises(TypeError, f.readline, 5.3) + with self.open(support.TESTFN, "r") as f: + self.assertRaises(TypeError, f.readline, 5.3) def test_raw_bytes_io(self): - f = io.BytesIO() + f = self.BytesIO() self.write_ops(f) data = f.getvalue() self.assertEqual(data, b"hello world\n") - f = io.BytesIO(data) + f = self.BytesIO(data) self.read_ops(f, True) def test_large_file_ops(self): # On Windows and Mac OSX this test comsumes large resources; It takes # a long time to build the >2GB file and takes >2GB of disk space # therefore the resource must be enabled to run this test. - if sys.platform[:3] in ('win', 'os2') or sys.platform == 'darwin': - if not test_support.is_resource_enabled("largefile"): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + if not support.is_resource_enabled("largefile"): print("\nTesting large file ops skipped on %s." % sys.platform, file=sys.stderr) print("It requires %d bytes and a long time." % self.LARGE, @@ -210,22 +367,20 @@ class IOTest(unittest.TestCase): print("Use 'regrtest.py -u largefile test_io' to run it.", file=sys.stderr) return - f = io.open(test_support.TESTFN, "w+b", 0) - self.large_file_ops(f) - f.close() - f = io.open(test_support.TESTFN, "w+b") - self.large_file_ops(f) - f.close() + with self.open(support.TESTFN, "w+b", 0) as f: + self.large_file_ops(f) + with self.open(support.TESTFN, "w+b") as f: + self.large_file_ops(f) def test_with_open(self): for bufsize in (0, 1, 100): f = None - with open(test_support.TESTFN, "wb", bufsize) as f: + with open(support.TESTFN, "wb", bufsize) as f: f.write(b"xxx") self.assertEqual(f.closed, True) f = None try: - with open(test_support.TESTFN, "wb", bufsize) as f: + with open(support.TESTFN, "wb", bufsize) as f: 1/0 except ZeroDivisionError: self.assertEqual(f.closed, True) @@ -234,60 +389,105 @@ class IOTest(unittest.TestCase): # issue 5008 def test_append_mode_tell(self): - with io.open(test_support.TESTFN, "wb") as f: + with self.open(support.TESTFN, "wb") as f: f.write(b"xxx") - with io.open(test_support.TESTFN, "ab", buffering=0) as f: + with self.open(support.TESTFN, "ab", buffering=0) as f: self.assertEqual(f.tell(), 3) - with io.open(test_support.TESTFN, "ab") as f: + with self.open(support.TESTFN, "ab") as f: self.assertEqual(f.tell(), 3) - with io.open(test_support.TESTFN, "a") as f: + with self.open(support.TESTFN, "a") as f: self.assert_(f.tell() > 0) def test_destructor(self): record = [] - class MyFileIO(io.FileIO): + class MyFileIO(self.FileIO): def __del__(self): record.append(1) - io.FileIO.__del__(self) + try: + f = super(MyFileIO, self).__del__ + except AttributeError: + pass + else: + f() def close(self): record.append(2) - io.FileIO.close(self) + super(MyFileIO, self).close() def flush(self): record.append(3) - io.FileIO.flush(self) - f = MyFileIO(test_support.TESTFN, "w") - f.write("xxx") + super(MyFileIO, self).flush() + f = MyFileIO(support.TESTFN, "wb") + f.write(b"xxx") del f + support.gc_collect() + self.assertEqual(record, [1, 2, 3]) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"xxx") + + def _check_base_destructor(self, base): + record = [] + class MyIO(base): + def __init__(self): + # This exercises the availability of attributes on object + # destruction. + # (in the C version, close() is called by the tp_dealloc + # function, not by __del__) + self.on_del = 1 + self.on_close = 2 + self.on_flush = 3 + def __del__(self): + record.append(self.on_del) + try: + f = super(MyIO, self).__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(self.on_close) + super(MyIO, self).close() + def flush(self): + record.append(self.on_flush) + super(MyIO, self).flush() + f = MyIO() + del f + support.gc_collect() self.assertEqual(record, [1, 2, 3]) + def test_IOBase_destructor(self): + self._check_base_destructor(self.IOBase) + + def test_RawIOBase_destructor(self): + self._check_base_destructor(self.RawIOBase) + + def test_BufferedIOBase_destructor(self): + self._check_base_destructor(self.BufferedIOBase) + + def test_TextIOBase_destructor(self): + self._check_base_destructor(self.TextIOBase) + def test_close_flushes(self): - f = io.open(test_support.TESTFN, "wb") - f.write(b"xxx") - f.close() - f = io.open(test_support.TESTFN, "rb") - self.assertEqual(f.read(), b"xxx") - f.close() + with self.open(support.TESTFN, "wb") as f: + f.write(b"xxx") + with self.open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"xxx") - def XXXtest_array_writes(self): - # XXX memory view not available yet - a = array.array('i', range(10)) - n = len(memoryview(a)) - f = io.open(test_support.TESTFN, "wb", 0) - self.assertEqual(f.write(a), n) - f.close() - f = io.open(test_support.TESTFN, "wb") - self.assertEqual(f.write(a), n) - f.close() + def test_array_writes(self): + a = array.array(b'i', range(10)) + n = len(a.tostring()) + with self.open(support.TESTFN, "wb", 0) as f: + self.assertEqual(f.write(a), n) + with self.open(support.TESTFN, "wb") as f: + self.assertEqual(f.write(a), n) def test_closefd(self): - self.assertRaises(ValueError, io.open, test_support.TESTFN, 'w', + self.assertRaises(ValueError, self.open, support.TESTFN, 'w', closefd=False) - def testReadClosed(self): - with io.open(test_support.TESTFN, "w") as f: + def test_read_closed(self): + with self.open(support.TESTFN, "w") as f: f.write("egg\n") - with io.open(test_support.TESTFN, "r") as f: - file = io.open(f.fileno(), "r", closefd=False) + with self.open(support.TESTFN, "r") as f: + file = self.open(f.fileno(), "r", closefd=False) self.assertEqual(file.read(), "egg\n") file.seek(0) file.close() @@ -295,86 +495,203 @@ class IOTest(unittest.TestCase): def test_no_closefd_with_filename(self): # can't use closefd in combination with a file name - self.assertRaises(ValueError, - io.open, test_support.TESTFN, "r", closefd=False) + self.assertRaises(ValueError, self.open, support.TESTFN, "r", closefd=False) def test_closefd_attr(self): - with io.open(test_support.TESTFN, "wb") as f: + with self.open(support.TESTFN, "wb") as f: f.write(b"egg\n") - with io.open(test_support.TESTFN, "r") as f: + with self.open(support.TESTFN, "r") as f: self.assertEqual(f.buffer.raw.closefd, True) - file = io.open(f.fileno(), "r", closefd=False) + file = self.open(f.fileno(), "r", closefd=False) self.assertEqual(file.buffer.raw.closefd, False) + def test_garbage_collection(self): + # FileIO objects are collected, and collecting them flushes + # all data to disk. + f = self.FileIO(support.TESTFN, "wb") + f.write(b"abcxxx") + f.f = f + wr = weakref.ref(f) + del f + support.gc_collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"abcxxx") -class MemorySeekTestMixin: + def test_unbounded_file(self): + # Issue #1174606: reading from an unbounded stream such as /dev/zero. + zero = "/dev/zero" + if not os.path.exists(zero): + self.skipTest("{0} does not exist".format(zero)) + if sys.maxsize > 0x7FFFFFFF: + self.skipTest("test can only run in a 32-bit address space") + if support.real_max_memuse < support._2G: + self.skipTest("test requires at least 2GB of memory") + with open(zero, "rb", buffering=0) as f: + self.assertRaises(OverflowError, f.read) + with open(zero, "rb") as f: + self.assertRaises(OverflowError, f.read) + with open(zero, "r") as f: + self.assertRaises(OverflowError, f.read) - def testInit(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) +class CIOTest(IOTest): + pass - def testRead(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) +class PyIOTest(IOTest): + test_array_writes = unittest.skip( + "len(array.array) returns number of elements rather than bytelength" + )(IOTest.test_array_writes) - self.assertEquals(buf[:1], bytesIo.read(1)) - self.assertEquals(buf[1:5], bytesIo.read(4)) - self.assertEquals(buf[5:], bytesIo.read(900)) - self.assertEquals(self.EOF, bytesIo.read()) - def testReadNoArgs(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) +class CommonBufferedTests: + # Tests common to BufferedReader, BufferedWriter and BufferedRandom - self.assertEquals(buf, bytesIo.read()) - self.assertEquals(self.EOF, bytesIo.read()) + def test_detach(self): + raw = self.MockRawIO() + buf = self.tp(raw) + self.assertIs(buf.detach(), raw) + self.assertRaises(ValueError, buf.detach) - def testSeek(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) + def test_fileno(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) - bytesIo.read(5) - bytesIo.seek(0) - self.assertEquals(buf, bytesIo.read()) + self.assertEquals(42, bufio.fileno()) - bytesIo.seek(3) - self.assertEquals(buf[3:], bytesIo.read()) - self.assertRaises(TypeError, bytesIo.seek, 0.0) + def test_no_fileno(self): + # XXX will we always have fileno() function? If so, kill + # this test. Else, write it. + pass - def testTell(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) + def test_invalid_args(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + # Invalid whence + self.assertRaises(ValueError, bufio.seek, 0, -1) + self.assertRaises(ValueError, bufio.seek, 0, 3) - self.assertEquals(0, bytesIo.tell()) - bytesIo.seek(5) - self.assertEquals(5, bytesIo.tell()) - bytesIo.seek(10000) - self.assertEquals(10000, bytesIo.tell()) + def test_override_destructor(self): + tp = self.tp + record = [] + class MyBufferedIO(tp): + def __del__(self): + record.append(1) + try: + f = super(MyBufferedIO, self).__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(2) + super(MyBufferedIO, self).close() + def flush(self): + record.append(3) + super(MyBufferedIO, self).flush() + rawio = self.MockRawIO() + bufio = MyBufferedIO(rawio) + writable = bufio.writable() + del bufio + support.gc_collect() + if writable: + self.assertEqual(record, [1, 2, 3]) + else: + self.assertEqual(record, [1, 2]) + def test_context_manager(self): + # Test usability as a context manager + rawio = self.MockRawIO() + bufio = self.tp(rawio) + def _with(): + with bufio: + pass + _with() + # bufio should now be closed, and using it a second time should raise + # a ValueError. + self.assertRaises(ValueError, _with) -class BytesIOTest(MemorySeekTestMixin, unittest.TestCase): - @staticmethod - def buftype(s): - return s.encode("utf-8") - ioclass = io.BytesIO - EOF = b"" + def test_error_through_destructor(self): + # Test that the exception state is not modified by a destructor, + # even if close() fails. + rawio = self.CloseFailureIO() + def f(): + self.tp(rawio).xyzzy + with support.captured_output("stderr") as s: + self.assertRaises(AttributeError, f) + s = s.getvalue().strip() + if s: + # The destructor *may* have printed an unraisable error, check it + self.assertEqual(len(s.splitlines()), 1) + self.assert_(s.startswith("Exception IOError: "), s) + self.assert_(s.endswith(" ignored"), s) + def test_repr(self): + raw = self.MockRawIO() + b = self.tp(raw) + clsname = "%s.%s" % (self.tp.__module__, self.tp.__name__) + self.assertEqual(repr(b), "<%s>" % clsname) + raw.name = "dummy" + self.assertEqual(repr(b), "<%s name=u'dummy'>" % clsname) + raw.name = b"dummy" + self.assertEqual(repr(b), "<%s name='dummy'>" % clsname) -class StringIOTest(MemorySeekTestMixin, unittest.TestCase): - buftype = str - ioclass = io.StringIO - EOF = "" +class BufferedReaderTest(unittest.TestCase, CommonBufferedTests): + read_mode = "rb" -class BufferedReaderTest(unittest.TestCase): + def test_constructor(self): + rawio = self.MockRawIO([b"abc"]) + bufio = self.tp(rawio) + bufio.__init__(rawio) + bufio.__init__(rawio, buffer_size=1024) + bufio.__init__(rawio, buffer_size=16) + self.assertEquals(b"abc", bufio.read()) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + rawio = self.MockRawIO([b"abc"]) + bufio.__init__(rawio) + self.assertEquals(b"abc", bufio.read()) - def testRead(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_read(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + self.assertEquals(b"abcdef", bufio.read(6)) + # Invalid args + self.assertRaises(ValueError, bufio.read, -2) - self.assertEquals(b"abcdef", bufio.read(6)) + def test_read1(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + self.assertEquals(b"a", bufio.read(1)) + self.assertEquals(b"b", bufio.read1(1)) + self.assertEquals(rawio._reads, 1) + self.assertEquals(b"c", bufio.read1(100)) + self.assertEquals(rawio._reads, 1) + self.assertEquals(b"d", bufio.read1(100)) + self.assertEquals(rawio._reads, 2) + self.assertEquals(b"efg", bufio.read1(100)) + self.assertEquals(rawio._reads, 3) + self.assertEquals(b"", bufio.read1(100)) + # Invalid args + self.assertRaises(ValueError, bufio.read1, -1) - def testBuffering(self): + def test_readinto(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + b = bytearray(2) + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"ab") + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"cd") + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"ef") + self.assertEquals(bufio.readinto(b), 1) + self.assertEquals(b, b"gf") + self.assertEquals(bufio.readinto(b), 0) + self.assertEquals(b, b"gf") + + def test_buffering(self): data = b"abcdefghi" dlen = len(data) @@ -385,61 +702,52 @@ class BufferedReaderTest(unittest.TestCa ] for bufsize, buf_read_sizes, raw_read_sizes in tests: - rawio = MockFileIO(data) - bufio = io.BufferedReader(rawio, buffer_size=bufsize) + rawio = self.MockFileIO(data) + bufio = self.tp(rawio, buffer_size=bufsize) pos = 0 for nbytes in buf_read_sizes: self.assertEquals(bufio.read(nbytes), data[pos:pos+nbytes]) pos += nbytes + # this is mildly implementation-dependent self.assertEquals(rawio.read_history, raw_read_sizes) - def testReadNonBlocking(self): + def test_read_non_blocking(self): # Inject some None's in there to simulate EWOULDBLOCK - rawio = MockRawIO((b"abc", b"d", None, b"efg", None, None)) - bufio = io.BufferedReader(rawio) + rawio = self.MockRawIO((b"abc", b"d", None, b"efg", None, None, None)) + bufio = self.tp(rawio) self.assertEquals(b"abcd", bufio.read(6)) self.assertEquals(b"e", bufio.read(1)) self.assertEquals(b"fg", bufio.read()) + self.assertEquals(b"", bufio.peek(1)) self.assert_(None is bufio.read()) self.assertEquals(b"", bufio.read()) - def testReadToEof(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_read_past_eof(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) self.assertEquals(b"abcdefg", bufio.read(9000)) - def testReadNoArgs(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_read_all(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) self.assertEquals(b"abcdefg", bufio.read()) - def testFileno(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) - - self.assertEquals(42, bufio.fileno()) - - def testFilenoNoFileno(self): - # XXX will we always have fileno() function? If so, kill - # this test. Else, write it. - pass - - def testThreads(self): + def test_threads(self): try: # Write out many bytes with exactly the same number of 0's, # 1's... 255's. This will help us check that concurrent reading # doesn't duplicate or forget contents. N = 1000 - l = range(256) * N + l = list(range(256)) * N random.shuffle(l) s = bytes(bytearray(l)) - with io.open(test_support.TESTFN, "wb") as f: + with io.open(support.TESTFN, "wb") as f: f.write(s) - with io.open(test_support.TESTFN, "rb", buffering=0) as raw: - bufio = io.BufferedReader(raw, 8) + with io.open(support.TESTFN, self.read_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) errors = [] results = [] def f(): @@ -467,82 +775,242 @@ class BufferedReaderTest(unittest.TestCa c = bytes(bytearray([i])) self.assertEqual(s.count(c), N) finally: - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) + def test_misbehaved_io(self): + rawio = self.MisbehavedRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + self.assertRaises(IOError, bufio.seek, 0) + self.assertRaises(IOError, bufio.tell) +class CBufferedReaderTest(BufferedReaderTest): + tp = io.BufferedReader -class BufferedWriterTest(unittest.TestCase): + def test_constructor(self): + BufferedReaderTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) - def testWrite(self): + def test_initialization(self): + rawio = self.MockRawIO([b"abc"]) + bufio = self.tp(rawio) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.read) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.read) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + self.assertRaises(ValueError, bufio.read) + + def test_misbehaved_io_read(self): + rawio = self.MisbehavedRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + # _pyio.BufferedReader seems to implement reading different, so that + # checking this is not so easy. + self.assertRaises(IOError, bufio.read, 10) + + def test_garbage_collection(self): + # C BufferedReader objects are collected. + # The Python version has __del__, so it ends into gc.garbage instead + rawio = self.FileIO(support.TESTFN, "w+b") + f = self.tp(rawio) + f.f = f + wr = weakref.ref(f) + del f + support.gc_collect() + self.assert_(wr() is None, wr) + +class PyBufferedReaderTest(BufferedReaderTest): + tp = pyio.BufferedReader + + +class BufferedWriterTest(unittest.TestCase, CommonBufferedTests): + write_mode = "wb" + + def test_constructor(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + bufio.__init__(rawio) + bufio.__init__(rawio, buffer_size=1024) + bufio.__init__(rawio, buffer_size=16) + self.assertEquals(3, bufio.write(b"abc")) + bufio.flush() + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + bufio.__init__(rawio) + self.assertEquals(3, bufio.write(b"ghi")) + bufio.flush() + self.assertEquals(b"".join(rawio._write_stack), b"abcghi") + + def test_detach_flush(self): + raw = self.MockRawIO() + buf = self.tp(raw) + buf.write(b"howdy!") + self.assertFalse(raw._write_stack) + buf.detach() + self.assertEqual(raw._write_stack, [b"howdy!"]) + + def test_write(self): # Write to the buffered IO but don't overflow the buffer. - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) - + writer = self.MockRawIO() + bufio = self.tp(writer, 8) bufio.write(b"abc") - self.assertFalse(writer._write_stack) - def testWriteOverflow(self): - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) + def test_write_overflow(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) + contents = b"abcdefghijklmnop" + for n in range(0, len(contents), 3): + bufio.write(contents[n:n+3]) + flushed = b"".join(writer._write_stack) + # At least (total - 8) bytes were implicitly flushed, perhaps more + # depending on the implementation. + self.assert_(flushed.startswith(contents[:-8]), flushed) - bufio.write(b"abc") - bufio.write(b"defghijkl") + def check_writes(self, intermediate_func): + # Lots of writes, test the flushed output is as expected. + contents = bytes(range(256)) * 1000 + n = 0 + writer = self.MockRawIO() + bufio = self.tp(writer, 13) + # Generator of write sizes: repeat each N 15 times then proceed to N+1 + def gen_sizes(): + for size in count(1): + for i in range(15): + yield size + sizes = gen_sizes() + while n < len(contents): + size = min(next(sizes), len(contents) - n) + self.assertEquals(bufio.write(contents[n:n+size]), size) + intermediate_func(bufio) + n += size + bufio.flush() + self.assertEquals(contents, + b"".join(writer._write_stack)) - self.assertEquals(b"abcdefghijkl", writer._write_stack[0]) + def test_writes(self): + self.check_writes(lambda bufio: None) - def testWriteNonBlocking(self): - raw = MockNonBlockWriterIO((9, 2, 22, -6, 10, 12, 12)) - bufio = io.BufferedWriter(raw, 8, 16) + def test_writes_and_flushes(self): + self.check_writes(lambda bufio: bufio.flush()) - bufio.write(b"asdf") - bufio.write(b"asdfa") - self.assertEquals(b"asdfasdfa", raw._write_stack[0]) + def test_writes_and_seeks(self): + def _seekabs(bufio): + pos = bufio.tell() + bufio.seek(pos + 1, 0) + bufio.seek(pos - 1, 0) + bufio.seek(pos, 0) + self.check_writes(_seekabs) + def _seekrel(bufio): + pos = bufio.seek(0, 1) + bufio.seek(+1, 1) + bufio.seek(-1, 1) + bufio.seek(pos, 0) + self.check_writes(_seekrel) - bufio.write(b"asdfasdfasdf") - self.assertEquals(b"asdfasdfasdf", raw._write_stack[1]) - bufio.write(b"asdfasdfasdf") - self.assertEquals(b"dfasdfasdf", raw._write_stack[2]) - self.assertEquals(b"asdfasdfasdf", raw._write_stack[3]) + def test_writes_and_truncates(self): + self.check_writes(lambda bufio: bufio.truncate(bufio.tell())) - bufio.write(b"asdfasdfasdf") + def test_write_non_blocking(self): + raw = self.MockNonBlockWriterIO() + bufio = self.tp(raw, 8) - # XXX I don't like this test. It relies too heavily on how the - # algorithm actually works, which we might change. Refactor - # later. + self.assertEquals(bufio.write(b"abcd"), 4) + self.assertEquals(bufio.write(b"efghi"), 5) + # 1 byte will be written, the rest will be buffered + raw.block_on(b"k") + self.assertEquals(bufio.write(b"jklmn"), 5) - def testFileno(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedWriter(rawio) + # 8 bytes will be written, 8 will be buffered and the rest will be lost + raw.block_on(b"0") + try: + bufio.write(b"opqrwxyz0123456789") + except self.BlockingIOError as e: + written = e.characters_written + else: + self.fail("BlockingIOError should have been raised") + self.assertEquals(written, 16) + self.assertEquals(raw.pop_written(), + b"abcdefghijklmnopqrwxyz") - self.assertEquals(42, bufio.fileno()) + self.assertEquals(bufio.write(b"ABCDEFGHI"), 9) + s = raw.pop_written() + # Previously buffered bytes were flushed + self.assertTrue(s.startswith(b"01234567A"), s) - def testFlush(self): - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) + def test_write_and_rewind(self): + raw = io.BytesIO() + bufio = self.tp(raw, 4) + self.assertEqual(bufio.write(b"abcdef"), 6) + self.assertEqual(bufio.tell(), 6) + bufio.seek(0, 0) + self.assertEqual(bufio.write(b"XY"), 2) + bufio.seek(6, 0) + self.assertEqual(raw.getvalue(), b"XYcdef") + self.assertEqual(bufio.write(b"123456"), 6) + bufio.flush() + self.assertEqual(raw.getvalue(), b"XYcdef123456") + def test_flush(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) bufio.write(b"abc") bufio.flush() - self.assertEquals(b"abc", writer._write_stack[0]) - def testThreads(self): - # BufferedWriter should not raise exceptions or crash - # when called from multiple threads. + def test_destructor(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) + bufio.write(b"abc") + del bufio + support.gc_collect() + self.assertEquals(b"abc", writer._write_stack[0]) + + def test_truncate(self): + # Truncate implicitly flushes the buffer. + with io.open(support.TESTFN, self.write_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) + bufio.write(b"abcdef") + self.assertEqual(bufio.truncate(3), 3) + self.assertEqual(bufio.tell(), 3) + with io.open(support.TESTFN, "rb", buffering=0) as f: + self.assertEqual(f.read(), b"abc") + + def test_threads(self): try: + # Write out many bytes from many threads and test they were + # all flushed. + N = 1000 + contents = bytes(range(256)) * N + sizes = cycle([1, 19]) + n = 0 + queue = deque() + while n < len(contents): + size = next(sizes) + queue.append(contents[n:n+size]) + n += size + del contents # We use a real file object because it allows us to # exercise situations where the GIL is released before # writing the buffer to the raw streams. This is in addition # to concurrency issues due to switching threads in the middle # of Python code. - with io.open(test_support.TESTFN, "wb", buffering=0) as raw: - bufio = io.BufferedWriter(raw, 8) + with io.open(support.TESTFN, self.write_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) errors = [] def f(): try: - # Write enough bytes to flush the buffer - s = b"a" * 19 - for i in range(50): + while True: + try: + s = queue.popleft() + except IndexError: + return bufio.write(s) except Exception as e: errors.append(e) @@ -555,37 +1023,218 @@ class BufferedWriterTest(unittest.TestCa t.join() self.assertFalse(errors, "the following exceptions were caught: %r" % errors) + bufio.close() + with io.open(support.TESTFN, "rb") as f: + s = f.read() + for i in range(256): + self.assertEquals(s.count(bytes([i])), N) finally: - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) + def test_misbehaved_io(self): + rawio = self.MisbehavedRawIO() + bufio = self.tp(rawio, 5) + self.assertRaises(IOError, bufio.seek, 0) + self.assertRaises(IOError, bufio.tell) + self.assertRaises(IOError, bufio.write, b"abcdef") + + def test_max_buffer_size_deprecation(self): + with support.check_warnings() as w: + warnings.simplefilter("always", DeprecationWarning) + self.tp(self.MockRawIO(), 8, 12) + self.assertEqual(len(w.warnings), 1) + warning = w.warnings[0] + self.assertTrue(warning.category is DeprecationWarning) + self.assertEqual(str(warning.message), + "max_buffer_size is deprecated") + + +class CBufferedWriterTest(BufferedWriterTest): + tp = io.BufferedWriter + + def test_constructor(self): + BufferedWriterTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) + + def test_initialization(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.write, b"def") + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.write, b"def") + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + self.assertRaises(ValueError, bufio.write, b"def") + + def test_garbage_collection(self): + # C BufferedWriter objects are collected, and collecting them flushes + # all data to disk. + # The Python version has __del__, so it ends into gc.garbage instead + rawio = self.FileIO(support.TESTFN, "w+b") + f = self.tp(rawio) + f.write(b"123xxx") + f.x = f + wr = weakref.ref(f) + del f + support.gc_collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"123xxx") + + +class PyBufferedWriterTest(BufferedWriterTest): + tp = pyio.BufferedWriter class BufferedRWPairTest(unittest.TestCase): - def testRWPair(self): - r = MockRawIO(()) - w = MockRawIO() - pair = io.BufferedRWPair(r, w) + def test_constructor(self): + pair = self.tp(self.MockRawIO(), self.MockRawIO()) self.assertFalse(pair.closed) - # XXX More Tests + def test_detach(self): + pair = self.tp(self.MockRawIO(), self.MockRawIO()) + self.assertRaises(self.UnsupportedOperation, pair.detach) + def test_constructor_max_buffer_size_deprecation(self): + with support.check_warnings() as w: + warnings.simplefilter("always", DeprecationWarning) + self.tp(self.MockRawIO(), self.MockRawIO(), 8, 12) + self.assertEqual(len(w.warnings), 1) + warning = w.warnings[0] + self.assertTrue(warning.category is DeprecationWarning) + self.assertEqual(str(warning.message), + "max_buffer_size is deprecated") -class BufferedRandomTest(unittest.TestCase): + def test_constructor_with_not_readable(self): + class NotReadable(MockRawIO): + def readable(self): + return False - def testReadAndWrite(self): - raw = MockRawIO((b"asdf", b"ghjk")) - rw = io.BufferedRandom(raw, 8, 12) + self.assertRaises(IOError, self.tp, NotReadable(), self.MockRawIO()) + + def test_constructor_with_not_writeable(self): + class NotWriteable(MockRawIO): + def writable(self): + return False + + self.assertRaises(IOError, self.tp, self.MockRawIO(), NotWriteable()) + + def test_read(self): + pair = self.tp(self.BytesIO(b"abcdef"), self.MockRawIO()) + + self.assertEqual(pair.read(3), b"abc") + self.assertEqual(pair.read(1), b"d") + self.assertEqual(pair.read(), b"ef") + + def test_read1(self): + # .read1() is delegated to the underlying reader object, so this test + # can be shallow. + pair = self.tp(self.BytesIO(b"abcdef"), self.MockRawIO()) + + self.assertEqual(pair.read1(3), b"abc") + + def test_readinto(self): + pair = self.tp(self.BytesIO(b"abcdef"), self.MockRawIO()) + + data = bytearray(5) + self.assertEqual(pair.readinto(data), 5) + self.assertEqual(data, b"abcde") + + def test_write(self): + w = self.MockRawIO() + pair = self.tp(self.MockRawIO(), w) + + pair.write(b"abc") + pair.flush() + pair.write(b"def") + pair.flush() + self.assertEqual(w._write_stack, [b"abc", b"def"]) + + def test_peek(self): + pair = self.tp(self.BytesIO(b"abcdef"), self.MockRawIO()) + + self.assertTrue(pair.peek(3).startswith(b"abc")) + self.assertEqual(pair.read(3), b"abc") + + def test_readable(self): + pair = self.tp(self.MockRawIO(), self.MockRawIO()) + self.assertTrue(pair.readable()) + + def test_writeable(self): + pair = self.tp(self.MockRawIO(), self.MockRawIO()) + self.assertTrue(pair.writable()) + + def test_seekable(self): + # BufferedRWPairs are never seekable, even if their readers and writers + # are. + pair = self.tp(self.MockRawIO(), self.MockRawIO()) + self.assertFalse(pair.seekable()) + + # .flush() is delegated to the underlying writer object and has been + # tested in the test_write method. + + def test_close_and_closed(self): + pair = self.tp(self.MockRawIO(), self.MockRawIO()) + self.assertFalse(pair.closed) + pair.close() + self.assertTrue(pair.closed) + + def test_isatty(self): + class SelectableIsAtty(MockRawIO): + def __init__(self, isatty): + MockRawIO.__init__(self) + self._isatty = isatty + + def isatty(self): + return self._isatty + + pair = self.tp(SelectableIsAtty(False), SelectableIsAtty(False)) + self.assertFalse(pair.isatty()) + + pair = self.tp(SelectableIsAtty(True), SelectableIsAtty(False)) + self.assertTrue(pair.isatty()) + + pair = self.tp(SelectableIsAtty(False), SelectableIsAtty(True)) + self.assertTrue(pair.isatty()) + + pair = self.tp(SelectableIsAtty(True), SelectableIsAtty(True)) + self.assertTrue(pair.isatty()) + +class CBufferedRWPairTest(BufferedRWPairTest): + tp = io.BufferedRWPair + +class PyBufferedRWPairTest(BufferedRWPairTest): + tp = pyio.BufferedRWPair + + +class BufferedRandomTest(BufferedReaderTest, BufferedWriterTest): + read_mode = "rb+" + write_mode = "wb+" + + def test_constructor(self): + BufferedReaderTest.test_constructor(self) + BufferedWriterTest.test_constructor(self) + + def test_read_and_write(self): + raw = self.MockRawIO((b"asdf", b"ghjk")) + rw = self.tp(raw, 8) self.assertEqual(b"as", rw.read(2)) rw.write(b"ddd") rw.write(b"eee") self.assertFalse(raw._write_stack) # Buffer writes - self.assertEqual(b"ghjk", rw.read()) # This read forces write flush + self.assertEqual(b"ghjk", rw.read()) self.assertEquals(b"dddeee", raw._write_stack[0]) - def testSeekAndTell(self): - raw = io.BytesIO(b"asdfghjkl") - rw = io.BufferedRandom(raw) + def test_seek_and_tell(self): + raw = self.BytesIO(b"asdfghjkl") + rw = self.tp(raw) self.assertEquals(b"as", rw.read(2)) self.assertEquals(2, rw.tell()) @@ -603,6 +1252,115 @@ class BufferedRandomTest(unittest.TestCa self.assertEquals(b"fl", rw.read(11)) self.assertRaises(TypeError, rw.seek, 0.0) + def check_flush_and_read(self, read_func): + raw = self.BytesIO(b"abcdefghi") + bufio = self.tp(raw) + + self.assertEquals(b"ab", read_func(bufio, 2)) + bufio.write(b"12") + self.assertEquals(b"ef", read_func(bufio, 2)) + self.assertEquals(6, bufio.tell()) + bufio.flush() + self.assertEquals(6, bufio.tell()) + self.assertEquals(b"ghi", read_func(bufio)) + raw.seek(0, 0) + raw.write(b"XYZ") + # flush() resets the read buffer + bufio.flush() + bufio.seek(0, 0) + self.assertEquals(b"XYZ", read_func(bufio, 3)) + + def test_flush_and_read(self): + self.check_flush_and_read(lambda bufio, *args: bufio.read(*args)) + + def test_flush_and_readinto(self): + def _readinto(bufio, n=-1): + b = bytearray(n if n >= 0 else 9999) + n = bufio.readinto(b) + return bytes(b[:n]) + self.check_flush_and_read(_readinto) + + def test_flush_and_peek(self): + def _peek(bufio, n=-1): + # This relies on the fact that the buffer can contain the whole + # raw stream, otherwise peek() can return less. + b = bufio.peek(n) + if n != -1: + b = b[:n] + bufio.seek(len(b), 1) + return b + self.check_flush_and_read(_peek) + + def test_flush_and_write(self): + raw = self.BytesIO(b"abcdefghi") + bufio = self.tp(raw) + + bufio.write(b"123") + bufio.flush() + bufio.write(b"45") + bufio.flush() + bufio.seek(0, 0) + self.assertEquals(b"12345fghi", raw.getvalue()) + self.assertEquals(b"12345fghi", bufio.read()) + + def test_threads(self): + BufferedReaderTest.test_threads(self) + BufferedWriterTest.test_threads(self) + + def test_writes_and_peek(self): + def _peek(bufio): + bufio.peek(1) + self.check_writes(_peek) + def _peek(bufio): + pos = bufio.tell() + bufio.seek(-1, 1) + bufio.peek(1) + bufio.seek(pos, 0) + self.check_writes(_peek) + + def test_writes_and_reads(self): + def _read(bufio): + bufio.seek(-1, 1) + bufio.read(1) + self.check_writes(_read) + + def test_writes_and_read1s(self): + def _read1(bufio): + bufio.seek(-1, 1) + bufio.read1(1) + self.check_writes(_read1) + + def test_writes_and_readintos(self): + def _read(bufio): + bufio.seek(-1, 1) + bufio.readinto(bytearray(1)) + self.check_writes(_read) + + def test_misbehaved_io(self): + BufferedReaderTest.test_misbehaved_io(self) + BufferedWriterTest.test_misbehaved_io(self) + +class CBufferedRandomTest(CBufferedReaderTest, CBufferedWriterTest, BufferedRandomTest): + tp = io.BufferedRandom + + def test_constructor(self): + BufferedRandomTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) + + def test_garbage_collection(self): + CBufferedReaderTest.test_garbage_collection(self) + CBufferedWriterTest.test_garbage_collection(self) + +class PyBufferedRandomTest(BufferedRandomTest): + tp = pyio.BufferedRandom + + # To fully exercise seek/tell, the StatefulIncrementalDecoder has these # properties: # - A single output character can correspond to many bytes of input. @@ -736,7 +1494,7 @@ class StatefulIncrementalDecoderTest(uni 'm--------------.') ] - def testDecoder(self): + def test_decoder(self): # Try a few one-shot test cases. for input, eof, output in self.test_cases: d = StatefulIncrementalDecoder() @@ -752,98 +1510,115 @@ class TextIOWrapperTest(unittest.TestCas def setUp(self): self.testdata = b"AAA\r\nBBB\rCCC\r\nDDD\nEEE\r\n" self.normalized = b"AAA\nBBB\nCCC\nDDD\nEEE\n".decode("ascii") + support.unlink(support.TESTFN) def tearDown(self): - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) - def testLineBuffering(self): - r = io.BytesIO() - b = io.BufferedWriter(r, 1000) - t = io.TextIOWrapper(b, newline="\n", line_buffering=True) - t.write(u"X") + def test_constructor(self): + r = self.BytesIO(b"\xc3\xa9\n\n") + b = self.BufferedReader(r, 1000) + t = self.TextIOWrapper(b) + t.__init__(b, encoding="latin1", newline="\r\n") + self.assertEquals(t.encoding, "latin1") + self.assertEquals(t.line_buffering, False) + t.__init__(b, encoding="utf8", line_buffering=True) + self.assertEquals(t.encoding, "utf8") + self.assertEquals(t.line_buffering, True) + self.assertEquals("\xe9\n", t.readline()) + self.assertRaises(TypeError, t.__init__, b, newline=42) + self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') + + def test_detach(self): + r = self.BytesIO() + b = self.BufferedWriter(r) + t = self.TextIOWrapper(b) + self.assertIs(t.detach(), b) + + t = self.TextIOWrapper(b, encoding="ascii") + t.write("howdy") + self.assertFalse(r.getvalue()) + t.detach() + self.assertEqual(r.getvalue(), b"howdy") + self.assertRaises(ValueError, t.detach) + + def test_repr(self): + raw = self.BytesIO("hello".encode("utf-8")) + b = self.BufferedReader(raw) + t = self.TextIOWrapper(b, encoding="utf-8") + modname = self.TextIOWrapper.__module__ + self.assertEqual(repr(t), + "<%s.TextIOWrapper encoding='utf-8'>" % modname) + raw.name = "dummy" + self.assertEqual(repr(t), + "<%s.TextIOWrapper name=u'dummy' encoding='utf-8'>" % modname) + raw.name = b"dummy" + self.assertEqual(repr(t), + "<%s.TextIOWrapper name='dummy' encoding='utf-8'>" % modname) + + def test_line_buffering(self): + r = self.BytesIO() + b = self.BufferedWriter(r, 1000) + t = self.TextIOWrapper(b, newline="\n", line_buffering=True) + t.write("X") self.assertEquals(r.getvalue(), b"") # No flush happened - t.write(u"Y\nZ") + t.write("Y\nZ") self.assertEquals(r.getvalue(), b"XY\nZ") # All got flushed - t.write(u"A\rB") + t.write("A\rB") self.assertEquals(r.getvalue(), b"XY\nZA\rB") - def testEncodingErrorsReading(self): + def test_encoding(self): + # Check the encoding attribute is always set, and valid + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="utf8") + self.assertEqual(t.encoding, "utf8") + t = self.TextIOWrapper(b) + self.assert_(t.encoding is not None) + codecs.lookup(t.encoding) + + def test_encoding_errors_reading(self): # (1) default - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii") self.assertRaises(UnicodeError, t.read) # (2) explicit strict - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="strict") self.assertRaises(UnicodeError, t.read) # (3) ignore - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="ignore") self.assertEquals(t.read(), "abc\n\n") # (4) replace - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="replace") - self.assertEquals(t.read(), u"abc\n\ufffd\n") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="replace") + self.assertEquals(t.read(), "abc\n\ufffd\n") - def testEncodingErrorsWriting(self): + def test_encoding_errors_writing(self): # (1) default - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii") - self.assertRaises(UnicodeError, t.write, u"\xff") + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii") + self.assertRaises(UnicodeError, t.write, "\xff") # (2) explicit strict - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="strict") - self.assertRaises(UnicodeError, t.write, u"\xff") + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="strict") + self.assertRaises(UnicodeError, t.write, "\xff") # (3) ignore - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="ignore", + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="ignore", newline="\n") - t.write(u"abc\xffdef\n") + t.write("abc\xffdef\n") t.flush() self.assertEquals(b.getvalue(), b"abcdef\n") # (4) replace - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="replace", + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="replace", newline="\n") - t.write(u"abc\xffdef\n") + t.write("abc\xffdef\n") t.flush() self.assertEquals(b.getvalue(), b"abc?def\n") - def testNewlinesInput(self): - testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" - normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") - for newline, expected in [ - (None, normalized.decode("ascii").splitlines(True)), - ("", testdata.decode("ascii").splitlines(True)), - ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), - ]: - buf = io.BytesIO(testdata) - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - self.assertEquals(txt.readlines(), expected) - txt.seek(0) - self.assertEquals(txt.read(), "".join(expected)) - - def testNewlinesOutput(self): - testdict = { - "": b"AAA\nBBB\nCCC\nX\rY\r\nZ", - "\n": b"AAA\nBBB\nCCC\nX\rY\r\nZ", - "\r": b"AAA\rBBB\rCCC\rX\rY\r\rZ", - "\r\n": b"AAA\r\nBBB\r\nCCC\r\nX\rY\r\r\nZ", - } - tests = [(None, testdict[os.linesep])] + sorted(testdict.items()) - for newline, expected in tests: - buf = io.BytesIO() - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - txt.write("AAA\nB") - txt.write("BB\nCCC\n") - txt.write("X\rY\r\nZ") - txt.flush() - self.assertEquals(buf.closed, False) - self.assertEquals(buf.getvalue(), expected) - - def testNewlines(self): + def test_newlines(self): input_lines = [ "unix\n", "windows\r\n", "os9\r", "last\n", "nonl" ] tests = [ @@ -867,8 +1642,8 @@ class TextIOWrapperTest(unittest.TestCas for do_reads in (False, True): for bufsize in range(1, 10): for newline, exp_lines in tests: - bufio = io.BufferedReader(io.BytesIO(data), bufsize) - textio = io.TextIOWrapper(bufio, newline=newline, + bufio = self.BufferedReader(self.BytesIO(data), bufsize) + textio = self.TextIOWrapper(bufio, newline=newline, encoding=encoding) if do_reads: got_lines = [] @@ -885,75 +1660,117 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(got_line, exp_line) self.assertEquals(len(got_lines), len(exp_lines)) - def testNewlinesInput(self): - testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" + def test_newlines_input(self): + testdata = b"AAA\nBB\x00B\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") for newline, expected in [ (None, normalized.decode("ascii").splitlines(True)), ("", testdata.decode("ascii").splitlines(True)), - ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), + ("\n", ["AAA\n", "BB\x00B\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), + ("\r\n", ["AAA\nBB\x00B\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), + ("\r", ["AAA\nBB\x00B\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), ]: - buf = io.BytesIO(testdata) - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) + buf = self.BytesIO(testdata) + txt = self.TextIOWrapper(buf, encoding="ascii", newline=newline) self.assertEquals(txt.readlines(), expected) txt.seek(0) self.assertEquals(txt.read(), "".join(expected)) - def testNewlinesOutput(self): - data = u"AAA\nBBB\rCCC\n" - data_lf = b"AAA\nBBB\rCCC\n" - data_cr = b"AAA\rBBB\rCCC\r" - data_crlf = b"AAA\r\nBBB\rCCC\r\n" - save_linesep = os.linesep - try: - for os.linesep, newline, expected in [ - ("\n", None, data_lf), - ("\r\n", None, data_crlf), - ("\n", "", data_lf), - ("\r\n", "", data_lf), - ("\n", "\n", data_lf), - ("\r\n", "\n", data_lf), - ("\n", "\r", data_cr), - ("\r\n", "\r", data_cr), - ("\n", "\r\n", data_crlf), - ("\r\n", "\r\n", data_crlf), - ]: - buf = io.BytesIO() - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - txt.write(data) - txt.close() - self.assertEquals(buf.closed, True) - self.assertRaises(ValueError, buf.getvalue) - finally: - os.linesep = save_linesep + def test_newlines_output(self): + testdict = { + "": b"AAA\nBBB\nCCC\nX\rY\r\nZ", + "\n": b"AAA\nBBB\nCCC\nX\rY\r\nZ", + "\r": b"AAA\rBBB\rCCC\rX\rY\r\rZ", + "\r\n": b"AAA\r\nBBB\r\nCCC\r\nX\rY\r\r\nZ", + } + tests = [(None, testdict[os.linesep])] + sorted(testdict.items()) + for newline, expected in tests: + buf = self.BytesIO() + txt = self.TextIOWrapper(buf, encoding="ascii", newline=newline) + txt.write("AAA\nB") + txt.write("BB\nCCC\n") + txt.write("X\rY\r\nZ") + txt.flush() + self.assertEquals(buf.closed, False) + self.assertEquals(buf.getvalue(), expected) + + def test_destructor(self): + l = [] + base = self.BytesIO + class MyBytesIO(base): + def close(self): + l.append(self.getvalue()) + base.close(self) + b = MyBytesIO() + t = self.TextIOWrapper(b, encoding="ascii") + t.write("abc") + del t + support.gc_collect() + self.assertEquals([b"abc"], l) + + def test_override_destructor(self): + record = [] + class MyTextIO(self.TextIOWrapper): + def __del__(self): + record.append(1) + try: + f = super(MyTextIO, self).__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(2) + super(MyTextIO, self).close() + def flush(self): + record.append(3) + super(MyTextIO, self).flush() + b = self.BytesIO() + t = MyTextIO(b, encoding="ascii") + del t + support.gc_collect() + self.assertEqual(record, [1, 2, 3]) + + def test_error_through_destructor(self): + # Test that the exception state is not modified by a destructor, + # even if close() fails. + rawio = self.CloseFailureIO() + def f(): + self.TextIOWrapper(rawio).xyzzy + with support.captured_output("stderr") as s: + self.assertRaises(AttributeError, f) + s = s.getvalue().strip() + if s: + # The destructor *may* have printed an unraisable error, check it + self.assertEqual(len(s.splitlines()), 1) + self.assert_(s.startswith("Exception IOError: "), s) + self.assert_(s.endswith(" ignored"), s) # Systematic tests of the text I/O API - def testBasicIO(self): + def test_basic_io(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le": - f = io.open(test_support.TESTFN, "w+", encoding=enc) + f = self.open(support.TESTFN, "w+", encoding=enc) f._CHUNK_SIZE = chunksize - self.assertEquals(f.write(u"abc"), 3) + self.assertEquals(f.write("abc"), 3) f.close() - f = io.open(test_support.TESTFN, "r+", encoding=enc) + f = self.open(support.TESTFN, "r+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEquals(f.tell(), 0) - self.assertEquals(f.read(), u"abc") + self.assertEquals(f.read(), "abc") cookie = f.tell() self.assertEquals(f.seek(0), 0) - self.assertEquals(f.read(2), u"ab") - self.assertEquals(f.read(1), u"c") - self.assertEquals(f.read(1), u"") - self.assertEquals(f.read(), u"") + self.assertEquals(f.read(2), "ab") + self.assertEquals(f.read(1), "c") + self.assertEquals(f.read(1), "") + self.assertEquals(f.read(), "") self.assertEquals(f.tell(), cookie) self.assertEquals(f.seek(0), 0) self.assertEquals(f.seek(0, 2), cookie) - self.assertEquals(f.write(u"def"), 3) + self.assertEquals(f.write("def"), 3) self.assertEquals(f.seek(cookie), cookie) - self.assertEquals(f.read(), u"def") + self.assertEquals(f.read(), "def") if enc.startswith("utf"): self.multi_line_test(f, enc) f.close() @@ -961,13 +1778,13 @@ class TextIOWrapperTest(unittest.TestCas def multi_line_test(self, f, enc): f.seek(0) f.truncate() - sample = u"s\xff\u0fff\uffff" + sample = "s\xff\u0fff\uffff" wlines = [] for size in (0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 62, 63, 64, 65, 1000): chars = [] for i in range(size): chars.append(sample[i % len(sample)]) - line = u"".join(chars) + u"\n" + line = "".join(chars) + "\n" wlines.append((f.tell(), line)) f.write(line) f.seek(0) @@ -980,28 +1797,28 @@ class TextIOWrapperTest(unittest.TestCas rlines.append((pos, line)) self.assertEquals(rlines, wlines) - def testTelling(self): - f = io.open(test_support.TESTFN, "w+", encoding="utf8") + def test_telling(self): + f = self.open(support.TESTFN, "w+", encoding="utf8") p0 = f.tell() - f.write(u"\xff\n") + f.write("\xff\n") p1 = f.tell() - f.write(u"\xff\n") + f.write("\xff\n") p2 = f.tell() f.seek(0) self.assertEquals(f.tell(), p0) - self.assertEquals(f.readline(), u"\xff\n") + self.assertEquals(f.readline(), "\xff\n") self.assertEquals(f.tell(), p1) - self.assertEquals(f.readline(), u"\xff\n") + self.assertEquals(f.readline(), "\xff\n") self.assertEquals(f.tell(), p2) f.seek(0) for line in f: - self.assertEquals(line, u"\xff\n") + self.assertEquals(line, "\xff\n") self.assertRaises(IOError, f.tell) self.assertEquals(f.tell(), p2) f.close() - def testSeeking(self): - chunk_size = io.TextIOWrapper._CHUNK_SIZE + def test_seeking(self): + chunk_size = _default_chunk_size() prefix_size = chunk_size - 2 u_prefix = "a" * prefix_size prefix = bytes(u_prefix.encode("utf-8")) @@ -1009,43 +1826,46 @@ class TextIOWrapperTest(unittest.TestCas u_suffix = "\u8888\n" suffix = bytes(u_suffix.encode("utf-8")) line = prefix + suffix - f = io.open(test_support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") f.write(line*2) f.close() - f = io.open(test_support.TESTFN, "r", encoding="utf-8") + f = self.open(support.TESTFN, "r", encoding="utf-8") s = f.read(prefix_size) - self.assertEquals(s, unicode(prefix, "ascii")) + self.assertEquals(s, prefix.decode("ascii")) self.assertEquals(f.tell(), prefix_size) self.assertEquals(f.readline(), u_suffix) - def testSeekingToo(self): + def test_seeking_too(self): # Regression test for a specific bug data = b'\xe0\xbf\xbf\n' - f = io.open(test_support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") f.write(data) f.close() - f = io.open(test_support.TESTFN, "r", encoding="utf-8") + f = self.open(support.TESTFN, "r", encoding="utf-8") f._CHUNK_SIZE # Just test that it exists f._CHUNK_SIZE = 2 f.readline() f.tell() - def testSeekAndTell(self): - """Test seek/tell using the StatefulIncrementalDecoder.""" + def test_seek_and_tell(self): + #Test seek/tell using the StatefulIncrementalDecoder. + # Make test faster by doing smaller seeks + CHUNK_SIZE = 128 - def testSeekAndTellWithData(data, min_pos=0): + def test_seek_and_tell_with_data(data, min_pos=0): """Tell/seek to various points within a data stream and ensure that the decoded data returned by read() is consistent.""" - f = io.open(test_support.TESTFN, 'wb') + f = self.open(support.TESTFN, 'wb') f.write(data) f.close() - f = io.open(test_support.TESTFN, encoding='test_decoder') + f = self.open(support.TESTFN, encoding='test_decoder') + f._CHUNK_SIZE = CHUNK_SIZE decoded = f.read() f.close() for i in range(min_pos, len(decoded) + 1): # seek positions for j in [1, 5, len(decoded) - i]: # read lengths - f = io.open(test_support.TESTFN, encoding='test_decoder') + f = self.open(support.TESTFN, encoding='test_decoder') self.assertEquals(f.read(i), decoded[:i]) cookie = f.tell() self.assertEquals(f.read(j), decoded[i:i + j]) @@ -1060,23 +1880,22 @@ class TextIOWrapperTest(unittest.TestCas try: # Try each test case. for input, _, _ in StatefulIncrementalDecoderTest.test_cases: - testSeekAndTellWithData(input) + test_seek_and_tell_with_data(input) # Position each test case so that it crosses a chunk boundary. - CHUNK_SIZE = io.TextIOWrapper._CHUNK_SIZE for input, _, _ in StatefulIncrementalDecoderTest.test_cases: offset = CHUNK_SIZE - len(input)//2 prefix = b'.'*offset # Don't bother seeking into the prefix (takes too long). min_pos = offset*2 - testSeekAndTellWithData(prefix + input, min_pos) + test_seek_and_tell_with_data(prefix + input, min_pos) # Ensure our test decoder won't interfere with subsequent tests. finally: StatefulIncrementalDecoder.codecEnabled = 0 - def testEncodedWrites(self): - data = u"1234567890" + def test_encoded_writes(self): + data = "1234567890" tests = ("utf-16", "utf-16-le", "utf-16-be", @@ -1084,54 +1903,26 @@ class TextIOWrapperTest(unittest.TestCas "utf-32-le", "utf-32-be") for encoding in tests: - buf = io.BytesIO() - f = io.TextIOWrapper(buf, encoding=encoding) + buf = self.BytesIO() + f = self.TextIOWrapper(buf, encoding=encoding) # Check if the BOM is written only once (see issue1753). f.write(data) f.write(data) f.seek(0) self.assertEquals(f.read(), data * 2) + f.seek(0) + self.assertEquals(f.read(), data * 2) self.assertEquals(buf.getvalue(), (data * 2).encode(encoding)) - def timingTest(self): - timer = time.time - enc = "utf8" - line = "\0\x0f\xff\u0fff\uffff\U000fffff\U0010ffff"*3 + "\n" - nlines = 10000 - nchars = len(line) - nbytes = len(line.encode(enc)) - for chunk_size in (32, 64, 128, 256): - f = io.open(test_support.TESTFN, "w+", encoding=enc) - f._CHUNK_SIZE = chunk_size - t0 = timer() - for i in range(nlines): - f.write(line) - f.flush() - t1 = timer() - f.seek(0) - for line in f: - pass - t2 = timer() - f.seek(0) - while f.readline(): - pass - t3 = timer() - f.seek(0) - while f.readline(): - f.tell() - t4 = timer() - f.close() - if test_support.verbose: - print("\nTiming test: %d lines of %d characters (%d bytes)" % - (nlines, nchars, nbytes)) - print("File chunk size: %6s" % f._CHUNK_SIZE) - print("Writing: %6.3f seconds" % (t1-t0)) - print("Reading using iteration: %6.3f seconds" % (t2-t1)) - print("Reading using readline(): %6.3f seconds" % (t3-t2)) - print("Using readline()+tell(): %6.3f seconds" % (t4-t3)) + def test_unreadable(self): + class UnReadable(self.BytesIO): + def readable(self): + return False + txt = self.TextIOWrapper(UnReadable()) + self.assertRaises(IOError, txt.read) - def testReadOneByOne(self): - txt = io.TextIOWrapper(io.BytesIO(b"AA\r\nBB")) + def test_read_one_by_one(self): + txt = self.TextIOWrapper(self.BytesIO(b"AA\r\nBB")) reads = "" while True: c = txt.read(1) @@ -1141,9 +1932,9 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, "AA\nBB") # read in amounts equal to TextIOWrapper._CHUNK_SIZE which is 128. - def testReadByChunk(self): + def test_read_by_chunk(self): # make sure "\r\n" straddles 128 char boundary. - txt = io.TextIOWrapper(io.BytesIO(b"A" * 127 + b"\r\nB")) + txt = self.TextIOWrapper(self.BytesIO(b"A" * 127 + b"\r\nB")) reads = "" while True: c = txt.read(128) @@ -1153,7 +1944,7 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, "A"*127+"\nB") def test_issue1395_1(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") # read one char at a time reads = "" @@ -1165,7 +1956,7 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, self.normalized) def test_issue1395_2(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = "" @@ -1177,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, self.normalized) def test_issue1395_3(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1188,7 +1979,7 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, self.normalized) def test_issue1395_4(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1196,7 +1987,7 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(reads, self.normalized) def test_issue1395_5(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1206,12 +1997,84 @@ class TextIOWrapperTest(unittest.TestCas self.assertEquals(txt.read(4), "BBB\n") def test_issue2282(self): - buffer = io.BytesIO(self.testdata) - txt = io.TextIOWrapper(buffer, encoding="ascii") + buffer = self.BytesIO(self.testdata) + txt = self.TextIOWrapper(buffer, encoding="ascii") self.assertEqual(buffer.seekable(), txt.seekable()) - def check_newline_decoder_utf8(self, decoder): + @unittest.skip("Issue #6213 with incremental encoders") + def test_append_bom(self): + # The BOM is not written again when appending to a non-empty file + filename = support.TESTFN + for charset in ('utf-8-sig', 'utf-16', 'utf-32'): + with self.open(filename, 'w', encoding=charset) as f: + f.write('aaa') + pos = f.tell() + with self.open(filename, 'rb') as f: + self.assertEquals(f.read(), 'aaa'.encode(charset)) + + with self.open(filename, 'a', encoding=charset) as f: + f.write('xxx') + with self.open(filename, 'rb') as f: + self.assertEquals(f.read(), 'aaaxxx'.encode(charset)) + + @unittest.skip("Issue #6213 with incremental encoders") + def test_seek_bom(self): + # Same test, but when seeking manually + filename = support.TESTFN + for charset in ('utf-8-sig', 'utf-16', 'utf-32'): + with self.open(filename, 'w', encoding=charset) as f: + f.write('aaa') + pos = f.tell() + with self.open(filename, 'r+', encoding=charset) as f: + f.seek(pos) + f.write('zzz') + f.seek(0) + f.write('bbb') + with self.open(filename, 'rb') as f: + self.assertEquals(f.read(), 'bbbzzz'.encode(charset)) + + def test_errors_property(self): + with self.open(support.TESTFN, "w") as f: + self.assertEqual(f.errors, "strict") + with self.open(support.TESTFN, "w", errors="replace") as f: + self.assertEqual(f.errors, "replace") + + +class CTextIOWrapperTest(TextIOWrapperTest): + + def test_initialization(self): + r = self.BytesIO(b"\xc3\xa9\n\n") + b = self.BufferedReader(r, 1000) + t = self.TextIOWrapper(b) + self.assertRaises(TypeError, t.__init__, b, newline=42) + self.assertRaises(ValueError, t.read) + self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') + self.assertRaises(ValueError, t.read) + + def test_garbage_collection(self): + # C TextIOWrapper objects are collected, and collecting them flushes + # all data to disk. + # The Python version has __del__, so it ends in gc.garbage instead. + rawio = io.FileIO(support.TESTFN, "wb") + b = self.BufferedWriter(rawio) + t = self.TextIOWrapper(b, encoding="ascii") + t.write("456def") + t.x = t + wr = weakref.ref(t) + del t + support.gc_collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"456def") + +class PyTextIOWrapperTest(TextIOWrapperTest): + pass + + +class IncrementalNewlineDecoderTest(unittest.TestCase): + + def check_newline_decoding_utf8(self, decoder): # UTF-8 specific tests for a newline decoder def _check_decode(b, s, **kwargs): # We exercise getstate() / setstate() as well as decode() @@ -1253,12 +2116,20 @@ class TextIOWrapperTest(unittest.TestCas _check_decode(b'\xe8\xa2\x88\r', "\u8888") _check_decode(b'\n', "\n") - def check_newline_decoder(self, decoder, encoding): + def check_newline_decoding(self, decoder, encoding): result = [] - encoder = codecs.getincrementalencoder(encoding)() - def _decode_bytewise(s): - for b in encoder.encode(s): - result.append(decoder.decode(b)) + if encoding is not None: + encoder = codecs.getincrementalencoder(encoding)() + def _decode_bytewise(s): + # Decode one byte at a time + for b in encoder.encode(s): + result.append(decoder.decode(b)) + else: + encoder = None + def _decode_bytewise(s): + # Decode one char at a time + for c in s: + result.append(decoder.decode(c)) self.assertEquals(decoder.newlines, None) _decode_bytewise("abc\n\r") self.assertEquals(decoder.newlines, '\n') @@ -1271,22 +2142,47 @@ class TextIOWrapperTest(unittest.TestCas _decode_bytewise("abc\r") self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc") decoder.reset() - self.assertEquals(decoder.decode("abc".encode(encoding)), "abc") + input = "abc" + if encoder is not None: + encoder.reset() + input = encoder.encode(input) + self.assertEquals(decoder.decode(input), "abc") self.assertEquals(decoder.newlines, None) def test_newline_decoder(self): encodings = ( - 'utf-8', 'latin-1', + # None meaning the IncrementalNewlineDecoder takes unicode input + # rather than bytes input + None, 'utf-8', 'latin-1', 'utf-16', 'utf-16-le', 'utf-16-be', 'utf-32', 'utf-32-le', 'utf-32-be', ) for enc in encodings: - decoder = codecs.getincrementaldecoder(enc)() - decoder = io.IncrementalNewlineDecoder(decoder, translate=True) - self.check_newline_decoder(decoder, enc) + decoder = enc and codecs.getincrementaldecoder(enc)() + decoder = self.IncrementalNewlineDecoder(decoder, translate=True) + self.check_newline_decoding(decoder, enc) decoder = codecs.getincrementaldecoder("utf-8")() - decoder = io.IncrementalNewlineDecoder(decoder, translate=True) - self.check_newline_decoder_utf8(decoder) + decoder = self.IncrementalNewlineDecoder(decoder, translate=True) + self.check_newline_decoding_utf8(decoder) + + def test_newline_bytes(self): + # Issue 5433: Excessive optimization in IncrementalNewlineDecoder + def _check(dec): + self.assertEquals(dec.newlines, None) + self.assertEquals(dec.decode("\u0D00"), "\u0D00") + self.assertEquals(dec.newlines, None) + self.assertEquals(dec.decode("\u0A00"), "\u0A00") + self.assertEquals(dec.newlines, None) + dec = self.IncrementalNewlineDecoder(None, translate=False) + _check(dec) + dec = self.IncrementalNewlineDecoder(None, translate=True) + _check(dec) + +class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): + pass + +class PyIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): + pass # XXX Tests for open() @@ -1294,40 +2190,39 @@ class TextIOWrapperTest(unittest.TestCas class MiscIOTest(unittest.TestCase): def tearDown(self): - test_support.unlink(test_support.TESTFN) + support.unlink(support.TESTFN) - def testImport__all__(self): - for name in io.__all__: - obj = getattr(io, name, None) + def test___all__(self): + for name in self.io.__all__: + obj = getattr(self.io, name, None) self.assertTrue(obj is not None, name) if name == "open": continue - elif "error" in name.lower(): + elif "error" in name.lower() or name == "UnsupportedOperation": self.assertTrue(issubclass(obj, Exception), name) elif not name.startswith("SEEK_"): - self.assertTrue(issubclass(obj, io.IOBase)) - + self.assertTrue(issubclass(obj, self.IOBase)) def test_attributes(self): - f = io.open(test_support.TESTFN, "wb", buffering=0) + f = self.open(support.TESTFN, "wb", buffering=0) self.assertEquals(f.mode, "wb") f.close() - f = io.open(test_support.TESTFN, "U") - self.assertEquals(f.name, test_support.TESTFN) - self.assertEquals(f.buffer.name, test_support.TESTFN) - self.assertEquals(f.buffer.raw.name, test_support.TESTFN) + f = self.open(support.TESTFN, "U") + self.assertEquals(f.name, support.TESTFN) + self.assertEquals(f.buffer.name, support.TESTFN) + self.assertEquals(f.buffer.raw.name, support.TESTFN) self.assertEquals(f.mode, "U") self.assertEquals(f.buffer.mode, "rb") self.assertEquals(f.buffer.raw.mode, "rb") f.close() - f = io.open(test_support.TESTFN, "w+") + f = self.open(support.TESTFN, "w+") self.assertEquals(f.mode, "w+") self.assertEquals(f.buffer.mode, "rb+") # Does it really matter? self.assertEquals(f.buffer.raw.mode, "rb+") - g = io.open(f.fileno(), "wb", closefd=False) + g = self.open(f.fileno(), "wb", closefd=False) self.assertEquals(g.mode, "wb") self.assertEquals(g.raw.mode, "wb") self.assertEquals(g.name, f.fileno()) @@ -1335,13 +2230,138 @@ class MiscIOTest(unittest.TestCase): f.close() g.close() + def test_io_after_close(self): + for kwargs in [ + {"mode": "w"}, + {"mode": "wb"}, + {"mode": "w", "buffering": 1}, + {"mode": "w", "buffering": 2}, + {"mode": "wb", "buffering": 0}, + {"mode": "r"}, + {"mode": "rb"}, + {"mode": "r", "buffering": 1}, + {"mode": "r", "buffering": 2}, + {"mode": "rb", "buffering": 0}, + {"mode": "w+"}, + {"mode": "w+b"}, + {"mode": "w+", "buffering": 1}, + {"mode": "w+", "buffering": 2}, + {"mode": "w+b", "buffering": 0}, + ]: + f = self.open(support.TESTFN, **kwargs) + f.close() + self.assertRaises(ValueError, f.flush) + self.assertRaises(ValueError, f.fileno) + self.assertRaises(ValueError, f.isatty) + self.assertRaises(ValueError, f.__iter__) + if hasattr(f, "peek"): + self.assertRaises(ValueError, f.peek, 1) + self.assertRaises(ValueError, f.read) + if hasattr(f, "read1"): + self.assertRaises(ValueError, f.read1, 1024) + if hasattr(f, "readinto"): + self.assertRaises(ValueError, f.readinto, bytearray(1024)) + self.assertRaises(ValueError, f.readline) + self.assertRaises(ValueError, f.readlines) + self.assertRaises(ValueError, f.seek, 0) + self.assertRaises(ValueError, f.tell) + self.assertRaises(ValueError, f.truncate) + self.assertRaises(ValueError, f.write, + b"" if "b" in kwargs['mode'] else "") + self.assertRaises(ValueError, f.writelines, []) + self.assertRaises(ValueError, next, f) + + def test_blockingioerror(self): + # Various BlockingIOError issues + self.assertRaises(TypeError, self.BlockingIOError) + self.assertRaises(TypeError, self.BlockingIOError, 1) + self.assertRaises(TypeError, self.BlockingIOError, 1, 2, 3, 4) + self.assertRaises(TypeError, self.BlockingIOError, 1, "", None) + b = self.BlockingIOError(1, "") + self.assertEqual(b.characters_written, 0) + class C(unicode): + pass + c = C("") + b = self.BlockingIOError(1, c) + c.b = b + b.c = c + wr = weakref.ref(c) + del c, b + support.gc_collect() + self.assert_(wr() is None, wr) + + def test_abcs(self): + # Test the visible base classes are ABCs. + self.assertTrue(isinstance(self.IOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.RawIOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.BufferedIOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.TextIOBase, abc.ABCMeta)) + + def _check_abc_inheritance(self, abcmodule): + with self.open(support.TESTFN, "wb", buffering=0) as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertTrue(isinstance(f, abcmodule.RawIOBase)) + self.assertFalse(isinstance(f, abcmodule.BufferedIOBase)) + self.assertFalse(isinstance(f, abcmodule.TextIOBase)) + with self.open(support.TESTFN, "wb") as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertFalse(isinstance(f, abcmodule.RawIOBase)) + self.assertTrue(isinstance(f, abcmodule.BufferedIOBase)) + self.assertFalse(isinstance(f, abcmodule.TextIOBase)) + with self.open(support.TESTFN, "w") as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertFalse(isinstance(f, abcmodule.RawIOBase)) + self.assertFalse(isinstance(f, abcmodule.BufferedIOBase)) + self.assertTrue(isinstance(f, abcmodule.TextIOBase)) + + def test_abc_inheritance(self): + # Test implementations inherit from their respective ABCs + self._check_abc_inheritance(self) + + def test_abc_inheritance_official(self): + # Test implementations inherit from the official ABCs of the + # baseline "io" module. + self._check_abc_inheritance(io) + +class CMiscIOTest(MiscIOTest): + io = io + +class PyMiscIOTest(MiscIOTest): + io = pyio def test_main(): - test_support.run_unittest(IOTest, BytesIOTest, StringIOTest, - BufferedReaderTest, BufferedWriterTest, - BufferedRWPairTest, BufferedRandomTest, - StatefulIncrementalDecoderTest, - TextIOWrapperTest, MiscIOTest) + tests = (CIOTest, PyIOTest, + CBufferedReaderTest, PyBufferedReaderTest, + CBufferedWriterTest, PyBufferedWriterTest, + CBufferedRWPairTest, PyBufferedRWPairTest, + CBufferedRandomTest, PyBufferedRandomTest, + StatefulIncrementalDecoderTest, + CIncrementalNewlineDecoderTest, PyIncrementalNewlineDecoderTest, + CTextIOWrapperTest, PyTextIOWrapperTest, + CMiscIOTest, PyMiscIOTest, + ) + + # Put the namespaces of the IO module we are testing and some useful mock + # classes in the __dict__ of each test. + mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO, + MockNonBlockWriterIO) + all_members = io.__all__ + ["IncrementalNewlineDecoder"] + c_io_ns = dict((name, getattr(io, name)) for name in all_members) + py_io_ns = dict((name, getattr(pyio, name)) for name in all_members) + globs = globals() + c_io_ns.update((x.__name__, globs["C" + x.__name__]) for x in mocks) + py_io_ns.update((x.__name__, globs["Py" + x.__name__]) for x in mocks) + # Avoid turning open into a bound method. + py_io_ns["open"] = pyio.OpenWrapper + for test in tests: + if test.__name__.startswith("C"): + for name, obj in c_io_ns.items(): + setattr(test, name, obj) + elif test.__name__.startswith("Py"): + for name, obj in py_io_ns.items(): + setattr(test, name, obj) + + support.run_unittest(*tests) if __name__ == "__main__": - unittest.main() + test_main() diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_largefile.py --- a/Lib/test/test_largefile.py +++ b/Lib/test/test_largefile.py @@ -1,12 +1,16 @@ """Test largefile support on system where this makes sense. """ +from __future__ import print_function + import os import stat import sys import unittest from test.test_support import run_unittest, TESTFN, verbose, requires, \ unlink +import io # C implementation of io +import _pyio as pyio # Python implementation of io try: import signal @@ -18,10 +22,10 @@ except (ImportError, AttributeError): pass # create >2GB file (2GB = 2147483648 bytes) -size = 2500000000L +size = 2500000000 -class TestCase(unittest.TestCase): +class LargeFileTest(unittest.TestCase): """Test that each file function works as expected for a large (i.e. > 2GB, do we have to check > 4GB) files. @@ -33,28 +37,28 @@ class TestCase(unittest.TestCase): def test_seek(self): if verbose: - print 'create large file via seek (may be sparse file) ...' - with open(TESTFN, 'wb') as f: - f.write('z') + print('create large file via seek (may be sparse file) ...') + with self.open(TESTFN, 'wb') as f: + f.write(b'z') f.seek(0) f.seek(size) - f.write('a') + f.write(b'a') f.flush() if verbose: - print 'check file size with os.fstat' + print('check file size with os.fstat') self.assertEqual(os.fstat(f.fileno())[stat.ST_SIZE], size+1) def test_osstat(self): if verbose: - print 'check file size with os.stat' + print('check file size with os.stat') self.assertEqual(os.stat(TESTFN)[stat.ST_SIZE], size+1) def test_seek_read(self): if verbose: - print 'play around with seek() and read() with the built largefile' - with open(TESTFN, 'rb') as f: + print('play around with seek() and read() with the built largefile') + with self.open(TESTFN, 'rb') as f: self.assertEqual(f.tell(), 0) - self.assertEqual(f.read(1), 'z') + self.assertEqual(f.read(1), b'z') self.assertEqual(f.tell(), 1) f.seek(0) self.assertEqual(f.tell(), 0) @@ -77,15 +81,15 @@ class TestCase(unittest.TestCase): f.seek(size) self.assertEqual(f.tell(), size) # the 'a' that was written at the end of file above - self.assertEqual(f.read(1), 'a') + self.assertEqual(f.read(1), b'a') f.seek(-size-1, 1) - self.assertEqual(f.read(1), 'z') + self.assertEqual(f.read(1), b'z') self.assertEqual(f.tell(), 1) def test_lseek(self): if verbose: - print 'play around with os.lseek() with the built largefile' - with open(TESTFN, 'rb') as f: + print('play around with os.lseek() with the built largefile') + with self.open(TESTFN, 'rb') as f: self.assertEqual(os.lseek(f.fileno(), 0, 0), 0) self.assertEqual(os.lseek(f.fileno(), 42, 0), 42) self.assertEqual(os.lseek(f.fileno(), 42, 1), 84) @@ -95,16 +99,16 @@ class TestCase(unittest.TestCase): self.assertEqual(os.lseek(f.fileno(), -size-1, 2), 0) self.assertEqual(os.lseek(f.fileno(), size, 0), size) # the 'a' that was written at the end of file above - self.assertEqual(f.read(1), 'a') + self.assertEqual(f.read(1), b'a') def test_truncate(self): if verbose: - print 'try truncate' - with open(TESTFN, 'r+b') as f: + print('try truncate') + with self.open(TESTFN, 'r+b') as f: # this is already decided before start running the test suite # but we do it anyway for extra protection if not hasattr(f, 'truncate'): - raise unittest.SkipTest, "open().truncate() not available on this system" + raise unittest.SkipTest("open().truncate() not available on this system") f.seek(0, 2) # else we've lost track of the true size self.assertEqual(f.tell(), size+1) @@ -120,17 +124,25 @@ class TestCase(unittest.TestCase): newsize -= 1 f.seek(42) f.truncate(newsize) - self.assertEqual(f.tell(), 42) # else pointer moved + self.assertEqual(f.tell(), newsize) # else wasn't truncated f.seek(0, 2) - self.assertEqual(f.tell(), newsize) # else wasn't truncated - + self.assertEqual(f.tell(), newsize) # XXX truncate(larger than true size) is ill-defined # across platform; cut it waaaaay back f.seek(0) f.truncate(1) - self.assertEqual(f.tell(), 0) # else pointer moved + self.assertEqual(f.tell(), 1) # else pointer moved + f.seek(0) self.assertEqual(len(f.read()), 1) # else wasn't truncated + def test_seekable(self): + # Issue #5016; seekable() can return False when the current position + # is negative when truncated to an int. + for pos in (2**31-1, 2**31, 2**31+1): + with self.open(TESTFN, 'rb') as f: + f.seek(pos) + self.assert_(f.seekable()) + def test_main(): # On Windows and Mac OSX this test comsumes large resources; It @@ -144,34 +156,39 @@ def test_main(): # Only run if the current filesystem supports large files. # (Skip this test on Windows, since we now always support # large files.) - f = open(TESTFN, 'wb') + f = open(TESTFN, 'wb', buffering=0) try: # 2**31 == 2147483648 - f.seek(2147483649L) + f.seek(2147483649) # Seeking is not enough of a test: you must write and # flush, too! - f.write("x") + f.write(b'x') f.flush() except (IOError, OverflowError): f.close() unlink(TESTFN) - raise unittest.SkipTest, "filesystem does not have largefile support" + raise unittest.SkipTest("filesystem does not have largefile support") else: f.close() suite = unittest.TestSuite() - suite.addTest(TestCase('test_seek')) - suite.addTest(TestCase('test_osstat')) - suite.addTest(TestCase('test_seek_read')) - suite.addTest(TestCase('test_lseek')) - with open(TESTFN, 'w') as f: - if hasattr(f, 'truncate'): - suite.addTest(TestCase('test_truncate')) - unlink(TESTFN) + for _open, prefix in [(io.open, 'C'), (pyio.open, 'Py')]: + class TestCase(LargeFileTest): + pass + TestCase.open = staticmethod(_open) + TestCase.__name__ = prefix + LargeFileTest.__name__ + suite.addTest(TestCase('test_seek')) + suite.addTest(TestCase('test_osstat')) + suite.addTest(TestCase('test_seek_read')) + suite.addTest(TestCase('test_lseek')) + with _open(TESTFN, 'wb') as f: + if hasattr(f, 'truncate'): + suite.addTest(TestCase('test_truncate')) + suite.addTest(TestCase('test_seekable')) + unlink(TESTFN) try: run_unittest(suite) finally: unlink(TESTFN) - if __name__ == '__main__': test_main() diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_memoryio.py --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -4,23 +4,66 @@ BytesIO -- for bytes """ from __future__ import unicode_literals +from __future__ import print_function import unittest -from test import test_support +from test import test_support as support import io +import _pyio as pyio import sys -import array -try: - import _bytesio - has_c_implementation = True -except ImportError: - has_c_implementation = False +class MemorySeekTestMixin: + + def testInit(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + def testRead(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(buf[:1], bytesIo.read(1)) + self.assertEquals(buf[1:5], bytesIo.read(4)) + self.assertEquals(buf[5:], bytesIo.read(900)) + self.assertEquals(self.EOF, bytesIo.read()) + + def testReadNoArgs(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(buf, bytesIo.read()) + self.assertEquals(self.EOF, bytesIo.read()) + + def testSeek(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + bytesIo.read(5) + bytesIo.seek(0) + self.assertEquals(buf, bytesIo.read()) + + bytesIo.seek(3) + self.assertEquals(buf[3:], bytesIo.read()) + self.assertRaises(TypeError, bytesIo.seek, 0.0) + + def testTell(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(0, bytesIo.tell()) + bytesIo.seek(5) + self.assertEquals(5, bytesIo.tell()) + bytesIo.seek(10000) + self.assertEquals(10000, bytesIo.tell()) class MemoryTestMixin: + def test_detach(self): + buf = self.ioclass() + self.assertRaises(self.UnsupportedOperation, buf.detach) + def write_ops(self, f, t): self.assertEqual(f.write(t("blah.")), 5) self.assertEqual(f.seek(0), 0) @@ -151,7 +194,7 @@ class MemoryTestMixin: self.assertEqual(memio.readline(), self.EOF) memio.seek(0) self.assertEqual(type(memio.readline()), type(buf)) - self.assertEqual(memio.readline(None), buf) + self.assertEqual(memio.readline(), buf) self.assertRaises(TypeError, memio.readline, '') memio.close() self.assertRaises(ValueError, memio.readline) @@ -197,7 +240,7 @@ class MemoryTestMixin: self.assertEqual(i, 10) memio = self.ioclass(buf * 2) memio.close() - self.assertRaises(ValueError, memio.next) + self.assertRaises(ValueError, next, memio) def test_getvalue(self): buf = self.buftype("1234567890") @@ -299,11 +342,14 @@ class MemoryTestMixin: self.assertEqual(test2(), buf) -class PyBytesIOTest(MemoryTestMixin, unittest.TestCase): +class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): + + UnsupportedOperation = pyio.UnsupportedOperation + @staticmethod def buftype(s): return s.encode("ascii") - ioclass = io._BytesIO + ioclass = pyio.BytesIO EOF = b"" def test_read1(self): @@ -333,7 +379,8 @@ class PyBytesIOTest(MemoryTestMixin, uni self.assertEqual(memio.readinto(b), 0) self.assertEqual(b, b"") self.assertRaises(TypeError, memio.readinto, '') - a = array.array(b'b', map(ord, b"hello world")) + import array + a = array.array(b'b', b"hello world") memio = self.ioclass(buf) memio.readinto(a) self.assertEqual(a.tostring(), b"1234567890d") @@ -365,19 +412,41 @@ class PyBytesIOTest(MemoryTestMixin, uni def test_bytes_array(self): buf = b"1234567890" - - a = array.array(b'b', map(ord, buf)) + import array + a = array.array(b'b', buf) memio = self.ioclass(a) self.assertEqual(memio.getvalue(), buf) self.assertEqual(memio.write(a), 10) self.assertEqual(memio.getvalue(), buf) -class PyStringIOTest(MemoryTestMixin, unittest.TestCase): +class PyStringIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): buftype = unicode - ioclass = io.StringIO + ioclass = pyio.StringIO + UnsupportedOperation = pyio.UnsupportedOperation EOF = "" + # TextIO-specific behaviour. + + def test_newlines_property(self): + memio = self.ioclass(newline=None) + # The C StringIO decodes newlines in write() calls, but the Python + # implementation only does when reading. This function forces them to + # be decoded for testing. + def force_decode(): + memio.seek(0) + memio.read() + self.assertEqual(memio.newlines, None) + memio.write("a\n") + force_decode() + self.assertEqual(memio.newlines, "\n") + memio.write("b\r\n") + force_decode() + self.assertEqual(memio.newlines, ("\n", "\r\n")) + memio.write("c\rd") + force_decode() + self.assertEqual(memio.newlines, ("\r", "\n", "\r\n")) + def test_relative_seek(self): memio = self.ioclass() @@ -388,29 +457,107 @@ class PyStringIOTest(MemoryTestMixin, un self.assertRaises(IOError, memio.seek, 1, 1) self.assertRaises(IOError, memio.seek, 1, 2) + def test_textio_properties(self): + memio = self.ioclass() + + # These are just dummy values but we nevertheless check them for fear + # of unexpected breakage. + self.assertIsNone(memio.encoding) + self.assertIsNone(memio.errors) + self.assertFalse(memio.line_buffering) + + def test_newline_none(self): + # newline=None + memio = self.ioclass("a\nb\r\nc\rd", newline=None) + self.assertEqual(list(memio), ["a\n", "b\n", "c\n", "d"]) + memio.seek(0) + self.assertEqual(memio.read(1), "a") + self.assertEqual(memio.read(2), "\nb") + self.assertEqual(memio.read(2), "\nc") + self.assertEqual(memio.read(1), "\n") + memio = self.ioclass(newline=None) + self.assertEqual(2, memio.write("a\n")) + self.assertEqual(3, memio.write("b\r\n")) + self.assertEqual(3, memio.write("c\rd")) + memio.seek(0) + self.assertEqual(memio.read(), "a\nb\nc\nd") + memio = self.ioclass("a\r\nb", newline=None) + self.assertEqual(memio.read(3), "a\nb") + + def test_newline_empty(self): + # newline="" + memio = self.ioclass("a\nb\r\nc\rd", newline="") + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"]) + memio.seek(0) + self.assertEqual(memio.read(4), "a\nb\r") + self.assertEqual(memio.read(2), "\nc") + self.assertEqual(memio.read(1), "\r") + memio = self.ioclass(newline="") + self.assertEqual(2, memio.write("a\n")) + self.assertEqual(2, memio.write("b\r")) + self.assertEqual(2, memio.write("\nc")) + self.assertEqual(2, memio.write("\rd")) + memio.seek(0) + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"]) + + def test_newline_lf(self): + # newline="\n" + memio = self.ioclass("a\nb\r\nc\rd") + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\rd"]) + + def test_newline_cr(self): + # newline="\r" + memio = self.ioclass("a\nb\r\nc\rd", newline="\r") + memio.seek(0) + self.assertEqual(memio.read(), "a\rb\r\rc\rd") + memio.seek(0) + self.assertEqual(list(memio), ["a\r", "b\r", "\r", "c\r", "d"]) + + def test_newline_crlf(self): + # newline="\r\n" + memio = self.ioclass("a\nb\r\nc\rd", newline="\r\n") + memio.seek(0) + self.assertEqual(memio.read(), "a\r\nb\r\r\nc\rd") + memio.seek(0) + self.assertEqual(list(memio), ["a\r\n", "b\r\r\n", "c\rd"]) + + def test_issue5265(self): + # StringIO can duplicate newlines in universal newlines mode + memio = self.ioclass("a\r\nb\r\n", newline=None) + self.assertEqual(memio.read(5), "a\nb\n") + + +class CBytesIOTest(PyBytesIOTest): + ioclass = io.BytesIO + UnsupportedOperation = io.UnsupportedOperation + + test_bytes_array = unittest.skip( + "array.array() does not have the new buffer API" + )(PyBytesIOTest.test_bytes_array) + + +class CStringIOTest(PyStringIOTest): + ioclass = io.StringIO + UnsupportedOperation = io.UnsupportedOperation + # XXX: For the Python version of io.StringIO, this is highly # dependent on the encoding used for the underlying buffer. - # def test_widechar(self): - # buf = self.buftype("\U0002030a\U00020347") - # memio = self.ioclass(buf) - # - # self.assertEqual(memio.getvalue(), buf) - # self.assertEqual(memio.write(buf), len(buf)) - # self.assertEqual(memio.tell(), len(buf)) - # self.assertEqual(memio.getvalue(), buf) - # self.assertEqual(memio.write(buf), len(buf)) - # self.assertEqual(memio.tell(), len(buf) * 2) - # self.assertEqual(memio.getvalue(), buf + buf) + def test_widechar(self): + buf = self.buftype("\U0002030a\U00020347") + memio = self.ioclass(buf) -if has_c_implementation: - class CBytesIOTest(PyBytesIOTest): - ioclass = io.BytesIO + self.assertEqual(memio.getvalue(), buf) + self.assertEqual(memio.write(buf), len(buf)) + self.assertEqual(memio.tell(), len(buf)) + self.assertEqual(memio.getvalue(), buf) + self.assertEqual(memio.write(buf), len(buf)) + self.assertEqual(memio.tell(), len(buf) * 2) + self.assertEqual(memio.getvalue(), buf + buf) + def test_main(): - tests = [PyBytesIOTest, PyStringIOTest] - if has_c_implementation: - tests.extend([CBytesIOTest]) - test_support.run_unittest(*tests) + tests = [PyBytesIOTest, PyStringIOTest, CBytesIOTest, CStringIOTest] + support.run_unittest(*tests) if __name__ == '__main__': test_main() diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_support.py --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -29,7 +29,7 @@ __all__ = ["Error", "TestFailed", "Resou "run_with_locale", "set_memlimit", "bigmemtest", "bigaddrspacetest", "BasicTestRunner", "run_unittest", "run_doctest", "threading_setup", "threading_cleanup", "reap_children", "cpython_only", - "check_impl_detail", "get_attribute"] + "check_impl_detail", "get_attribute", "py3k_bytes"] class Error(Exception): """Base class for regression test exceptions.""" @@ -968,3 +968,18 @@ def reap_children(): break except: break + +def py3k_bytes(b): + """Emulate the py3k bytes() constructor. + + NOTE: This is only a best effort function. + """ + try: + # memoryview? + return b.tobytes() + except AttributeError: + try: + # iterable of ints? + return b"".join(chr(x) for x in b) + except TypeError: + return bytes(b) diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Lib/test/test_univnewlines.py --- a/Lib/test/test_univnewlines.py +++ b/Lib/test/test_univnewlines.py @@ -1,20 +1,25 @@ # Tests universal newline support for both reading and parsing files. + +from __future__ import print_function +from __future__ import unicode_literals + +import io +import _pyio as pyio import unittest import os import sys -from test import test_support +from test import test_support as support if not hasattr(sys.stdin, 'newlines'): - raise unittest.SkipTest, \ - "This Python does not have universal newline support" + raise unittest.SkipTest( + "This Python does not have universal newline support") FATX = 'x' * (2**14) DATA_TEMPLATE = [ "line1=1", - "line2='this is a very long line designed to go past the magic " + - "hundred character limit that is inside fileobject.c and which " + - "is meant to speed up the common case, but we also want to test " + + "line2='this is a very long line designed to go past any default " + + "buffer limits that exist in io.py but we also want to test " + "the uncommon case, naturally.'", "def line3():pass", "line4 = '%s'" % FATX, @@ -28,48 +33,50 @@ DATA_CRLF = "\r\n".join(DATA_TEMPLATE) + # before end-of-file. DATA_MIXED = "\n".join(DATA_TEMPLATE) + "\r" DATA_SPLIT = [x + "\n" for x in DATA_TEMPLATE] -del x class TestGenericUnivNewlines(unittest.TestCase): # use a class variable DATA to define the data to write to the file # and a class variable NEWLINE to set the expected newlines value - READMODE = 'U' + READMODE = 'r' WRITEMODE = 'wb' def setUp(self): - with open(test_support.TESTFN, self.WRITEMODE) as fp: - fp.write(self.DATA) + data = self.DATA + if "b" in self.WRITEMODE: + data = data.encode("ascii") + with self.open(support.TESTFN, self.WRITEMODE) as fp: + fp.write(data) def tearDown(self): try: - os.unlink(test_support.TESTFN) + os.unlink(support.TESTFN) except: pass def test_read(self): - with open(test_support.TESTFN, self.READMODE) as fp: + with self.open(support.TESTFN, self.READMODE) as fp: data = fp.read() self.assertEqual(data, DATA_LF) - self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) + self.assertEqual(set(fp.newlines), set(self.NEWLINE)) def test_readlines(self): - with open(test_support.TESTFN, self.READMODE) as fp: + with self.open(support.TESTFN, self.READMODE) as fp: data = fp.readlines() self.assertEqual(data, DATA_SPLIT) - self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) + self.assertEqual(set(fp.newlines), set(self.NEWLINE)) def test_readline(self): - with open(test_support.TESTFN, self.READMODE) as fp: + with self.open(support.TESTFN, self.READMODE) as fp: data = [] d = fp.readline() while d: data.append(d) d = fp.readline() self.assertEqual(data, DATA_SPLIT) - self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) + self.assertEqual(set(fp.newlines), set(self.NEWLINE)) def test_seek(self): - with open(test_support.TESTFN, self.READMODE) as fp: + with self.open(support.TESTFN, self.READMODE) as fp: fp.readline() pos = fp.tell() data = fp.readlines() @@ -78,19 +85,6 @@ class TestGenericUnivNewlines(unittest.T data = fp.readlines() self.assertEqual(data, DATA_SPLIT[1:]) - def test_execfile(self): - namespace = {} - execfile(test_support.TESTFN, namespace) - func = namespace['line3'] - self.assertEqual(func.func_code.co_firstlineno, 3) - self.assertEqual(namespace['line4'], FATX) - - -class TestNativeNewlines(TestGenericUnivNewlines): - NEWLINE = None - DATA = DATA_LF - READMODE = 'r' - WRITEMODE = 'w' class TestCRNewlines(TestGenericUnivNewlines): NEWLINE = '\r' @@ -105,7 +99,7 @@ class TestCRLFNewlines(TestGenericUnivNe DATA = DATA_CRLF def test_tell(self): - with open(test_support.TESTFN, self.READMODE) as fp: + with self.open(support.TESTFN, self.READMODE) as fp: self.assertEqual(repr(fp.newlines), repr(None)) data = fp.readline() pos = fp.tell() @@ -117,13 +111,22 @@ class TestMixedNewlines(TestGenericUnivN def test_main(): - test_support.run_unittest( - TestNativeNewlines, - TestCRNewlines, - TestLFNewlines, - TestCRLFNewlines, - TestMixedNewlines - ) + base_tests = (TestCRNewlines, + TestLFNewlines, + TestCRLFNewlines, + TestMixedNewlines) + tests = [] + # Test the C and Python implementations. + for test in base_tests: + class CTest(test): + open = io.open + CTest.__name__ = str("C" + test.__name__) + class PyTest(test): + open = staticmethod(pyio.open) + PyTest.__name__ = str("Py" + test.__name__) + tests.append(CTest) + tests.append(PyTest) + support.run_unittest(*tests) if __name__ == '__main__': test_main() diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_bytesio.c --- a/Modules/_bytesio.c +++ /dev/null @@ -1,763 +0,0 @@ -#include "Python.h" - -typedef struct { - PyObject_HEAD - char *buf; - Py_ssize_t pos; - Py_ssize_t string_size; - size_t buf_size; -} BytesIOObject; - -#define CHECK_CLOSED(self) \ - if ((self)->buf == NULL) { \ - PyErr_SetString(PyExc_ValueError, \ - "I/O operation on closed file."); \ - return NULL; \ - } - -/* Internal routine to get a line from the buffer of a BytesIO - object. Returns the length between the current position to the - next newline character. */ -static Py_ssize_t -get_line(BytesIOObject *self, char **output) -{ - char *n; - const char *str_end; - Py_ssize_t len; - - assert(self->buf != NULL); - - /* Move to the end of the line, up to the end of the string, s. */ - str_end = self->buf + self->string_size; - for (n = self->buf + self->pos; - n < str_end && *n != '\n'; - n++); - - /* Skip the newline character */ - if (n < str_end) - n++; - - /* Get the length from the current position to the end of the line. */ - len = n - (self->buf + self->pos); - *output = self->buf + self->pos; - - assert(len >= 0); - assert(self->pos < PY_SSIZE_T_MAX - len); - self->pos += len; - - return len; -} - -/* Internal routine for changing the size of the buffer of BytesIO objects. - The caller should ensure that the 'size' argument is non-negative. Returns - 0 on success, -1 otherwise. */ -static int -resize_buffer(BytesIOObject *self, size_t size) -{ - /* Here, unsigned types are used to avoid dealing with signed integer - overflow, which is undefined in C. */ - size_t alloc = self->buf_size; - char *new_buf = NULL; - - assert(self->buf != NULL); - - /* For simplicity, stay in the range of the signed type. Anyway, Python - doesn't allow strings to be longer than this. */ - if (size > PY_SSIZE_T_MAX) - goto overflow; - - if (size < alloc / 2) { - /* Major downsize; resize down to exact size. */ - alloc = size + 1; - } - else if (size < alloc) { - /* Within allocated size; quick exit */ - return 0; - } - else if (size <= alloc * 1.125) { - /* Moderate upsize; overallocate similar to list_resize() */ - alloc = size + (size >> 3) + (size < 9 ? 3 : 6); - } - else { - /* Major upsize; resize up to exact size */ - alloc = size + 1; - } - - if (alloc > ((size_t)-1) / sizeof(char)) - goto overflow; - new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); - if (new_buf == NULL) { - PyErr_NoMemory(); - return -1; - } - self->buf_size = alloc; - self->buf = new_buf; - - return 0; - - overflow: - PyErr_SetString(PyExc_OverflowError, - "new buffer size too large"); - return -1; -} - -/* Internal routine for writing a string of bytes to the buffer of a BytesIO - object. Returns the number of bytes wrote, or -1 on error. */ -static Py_ssize_t -write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len) -{ - assert(self->buf != NULL); - assert(self->pos >= 0); - assert(len >= 0); - - if ((size_t)self->pos + len > self->buf_size) { - if (resize_buffer(self, (size_t)self->pos + len) < 0) - return -1; - } - - if (self->pos > self->string_size) { - /* In case of overseek, pad with null bytes the buffer region between - the end of stream and the current position. - - 0 lo string_size hi - | |<---used--->|<----------available----------->| - | | <--to pad-->|<---to write---> | - 0 buf position - */ - memset(self->buf + self->string_size, '\0', - (self->pos - self->string_size) * sizeof(char)); - } - - /* Copy the data to the internal buffer, overwriting some of the existing - data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, bytes, len); - self->pos += len; - - /* Set the new length of the internal string if it has changed. */ - if (self->string_size < self->pos) { - self->string_size = self->pos; - } - - return len; -} - -static PyObject * -bytesio_get_closed(BytesIOObject *self) -{ - if (self->buf == NULL) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; -} - -/* Generic getter for the writable, readable and seekable properties */ -static PyObject * -return_true(BytesIOObject *self) -{ - Py_RETURN_TRUE; -} - -PyDoc_STRVAR(flush_doc, -"flush() -> None. Does nothing."); - -static PyObject * -bytesio_flush(BytesIOObject *self) -{ - Py_RETURN_NONE; -} - -PyDoc_STRVAR(getval_doc, -"getvalue() -> bytes.\n" -"\n" -"Retrieve the entire contents of the BytesIO object."); - -static PyObject * -bytesio_getvalue(BytesIOObject *self) -{ - CHECK_CLOSED(self); - return PyString_FromStringAndSize(self->buf, self->string_size); -} - -PyDoc_STRVAR(isatty_doc, -"isatty() -> False.\n" -"\n" -"Always returns False since BytesIO objects are not connected\n" -"to a tty-like device."); - -static PyObject * -bytesio_isatty(BytesIOObject *self) -{ - CHECK_CLOSED(self); - Py_RETURN_FALSE; -} - -PyDoc_STRVAR(tell_doc, -"tell() -> current file position, an integer\n"); - -static PyObject * -bytesio_tell(BytesIOObject *self) -{ - CHECK_CLOSED(self); - return PyInt_FromSsize_t(self->pos); -} - -PyDoc_STRVAR(read_doc, -"read([size]) -> read at most size bytes, returned as a string.\n" -"\n" -"If the size argument is negative, read until EOF is reached.\n" -"Return an empty string at EOF."); - -static PyObject * -bytesio_read(BytesIOObject *self, PyObject *args) -{ - Py_ssize_t size, n; - char *output; - PyObject *arg = Py_None; - - CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "|O:read", &arg)) - return NULL; - - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); - if (size == -1 && PyErr_Occurred()) - return NULL; - } - else if (arg == Py_None) { - /* Read until EOF is reached, by default. */ - size = -1; - } - else { - PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", - Py_TYPE(arg)->tp_name); - return NULL; - } - - /* adjust invalid sizes */ - n = self->string_size - self->pos; - if (size < 0 || size > n) { - size = n; - if (size < 0) - size = 0; - } - - assert(self->buf != NULL); - output = self->buf + self->pos; - self->pos += size; - - return PyString_FromStringAndSize(output, size); -} - - -PyDoc_STRVAR(read1_doc, -"read1(size) -> read at most size bytes, returned as a string.\n" -"\n" -"If the size argument is negative or omitted, read until EOF is reached.\n" -"Return an empty string at EOF."); - -static PyObject * -bytesio_read1(BytesIOObject *self, PyObject *n) -{ - PyObject *arg, *res; - - arg = PyTuple_Pack(1, n); - if (arg == NULL) - return NULL; - res = bytesio_read(self, arg); - Py_DECREF(arg); - return res; -} - -PyDoc_STRVAR(readline_doc, -"readline([size]) -> next line from the file, as a string.\n" -"\n" -"Retain newline. A non-negative size argument limits the maximum\n" -"number of bytes to return (an incomplete line may be returned then).\n" -"Return an empty string at EOF.\n"); - -static PyObject * -bytesio_readline(BytesIOObject *self, PyObject *args) -{ - Py_ssize_t size, n; - char *output; - PyObject *arg = Py_None; - - CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "|O:readline", &arg)) - return NULL; - - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); - if (size == -1 && PyErr_Occurred()) - return NULL; - } - else if (arg == Py_None) { - /* No size limit, by default. */ - size = -1; - } - else { - PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", - Py_TYPE(arg)->tp_name); - return NULL; - } - - n = get_line(self, &output); - - if (size >= 0 && size < n) { - size = n - size; - n -= size; - self->pos -= size; - } - - return PyString_FromStringAndSize(output, n); -} - -PyDoc_STRVAR(readlines_doc, -"readlines([size]) -> list of strings, each a line from the file.\n" -"\n" -"Call readline() repeatedly and return a list of the lines so read.\n" -"The optional size argument, if given, is an approximate bound on the\n" -"total number of bytes in the lines returned.\n"); - -static PyObject * -bytesio_readlines(BytesIOObject *self, PyObject *args) -{ - Py_ssize_t maxsize, size, n; - PyObject *result, *line; - char *output; - PyObject *arg = Py_None; - - CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "|O:readlines", &arg)) - return NULL; - - if (PyInt_Check(arg)) { - maxsize = PyInt_AsSsize_t(arg); - if (maxsize == -1 && PyErr_Occurred()) - return NULL; - } - else if (arg == Py_None) { - /* No size limit, by default. */ - maxsize = -1; - } - else { - PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", - Py_TYPE(arg)->tp_name); - return NULL; - } - - size = 0; - result = PyList_New(0); - if (!result) - return NULL; - - while ((n = get_line(self, &output)) != 0) { - line = PyString_FromStringAndSize(output, n); - if (!line) - goto on_error; - if (PyList_Append(result, line) == -1) { - Py_DECREF(line); - goto on_error; - } - Py_DECREF(line); - size += n; - if (maxsize > 0 && size >= maxsize) - break; - } - return result; - - on_error: - Py_DECREF(result); - return NULL; -} - -PyDoc_STRVAR(readinto_doc, -"readinto(bytearray) -> int. Read up to len(b) bytes into b.\n" -"\n" -"Returns number of bytes read (0 for EOF), or None if the object\n" -"is set not to block as has no data to read."); - -static PyObject * -bytesio_readinto(BytesIOObject *self, PyObject *buffer) -{ - void *raw_buffer; - Py_ssize_t len; - - CHECK_CLOSED(self); - - if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1) - return NULL; - - if (self->pos + len > self->string_size) - len = self->string_size - self->pos; - - memcpy(raw_buffer, self->buf + self->pos, len); - assert(self->pos + len < PY_SSIZE_T_MAX); - assert(len >= 0); - self->pos += len; - - return PyInt_FromSsize_t(len); -} - -PyDoc_STRVAR(truncate_doc, -"truncate([size]) -> int. Truncate the file to at most size bytes.\n" -"\n" -"Size defaults to the current file position, as returned by tell().\n" -"Returns the new size. Imply an absolute seek to the position size."); - -static PyObject * -bytesio_truncate(BytesIOObject *self, PyObject *args) -{ - Py_ssize_t size; - PyObject *arg = Py_None; - - CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) - return NULL; - - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); - if (size == -1 && PyErr_Occurred()) - return NULL; - } - else if (arg == Py_None) { - /* Truncate to current position if no argument is passed. */ - size = self->pos; - } - else { - PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", - Py_TYPE(arg)->tp_name); - return NULL; - } - - if (size < 0) { - PyErr_Format(PyExc_ValueError, - "negative size value %zd", size); - return NULL; - } - - if (size < self->string_size) { - self->string_size = size; - if (resize_buffer(self, size) < 0) - return NULL; - } - self->pos = size; - - return PyInt_FromSsize_t(size); -} - -static PyObject * -bytesio_iternext(BytesIOObject *self) -{ - char *next; - Py_ssize_t n; - - CHECK_CLOSED(self); - - n = get_line(self, &next); - - if (!next || n == 0) - return NULL; - - return PyString_FromStringAndSize(next, n); -} - -PyDoc_STRVAR(seek_doc, -"seek(pos, whence=0) -> int. Change stream position.\n" -"\n" -"Seek to byte offset pos relative to position indicated by whence:\n" -" 0 Start of stream (the default). pos should be >= 0;\n" -" 1 Current position - pos may be negative;\n" -" 2 End of stream - pos usually negative.\n" -"Returns the new absolute position."); - -static PyObject * -bytesio_seek(BytesIOObject *self, PyObject *args) -{ - PyObject *pos_obj, *mode_obj; - Py_ssize_t pos; - int mode = 0; - - CHECK_CLOSED(self); - - /* Special-case for 2.x to prevent floats from passing through. - This only needed to make a test in test_io succeed. */ - if (!PyArg_UnpackTuple(args, "seek", 1, 2, &pos_obj, &mode_obj)) - return NULL; - if (PyFloat_Check(pos_obj)) { - PyErr_SetString(PyExc_TypeError, - "position argument must be an integer"); - return NULL; - } - - if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) - return NULL; - - if (pos < 0 && mode == 0) { - PyErr_Format(PyExc_ValueError, - "negative seek value %zd", pos); - return NULL; - } - - /* mode 0: offset relative to beginning of the string. - mode 1: offset relative to current position. - mode 2: offset relative the end of the string. */ - if (mode == 1) { - if (pos > PY_SSIZE_T_MAX - self->pos) { - PyErr_SetString(PyExc_OverflowError, - "new position too large"); - return NULL; - } - pos += self->pos; - } - else if (mode == 2) { - if (pos > PY_SSIZE_T_MAX - self->string_size) { - PyErr_SetString(PyExc_OverflowError, - "new position too large"); - return NULL; - } - pos += self->string_size; - } - else if (mode != 0) { - PyErr_Format(PyExc_ValueError, - "invalid whence (%i, should be 0, 1 or 2)", mode); - return NULL; - } - - if (pos < 0) - pos = 0; - self->pos = pos; - - return PyInt_FromSsize_t(self->pos); -} - -PyDoc_STRVAR(write_doc, -"write(bytes) -> int. Write bytes to file.\n" -"\n" -"Return the number of bytes written."); - -static PyObject * -bytesio_write(BytesIOObject *self, PyObject *obj) -{ - const char *bytes; - Py_ssize_t size; - Py_ssize_t n = 0; - - CHECK_CLOSED(self); - - /* Special-case in 2.x to prevent unicode objects to pass through. */ - if (PyUnicode_Check(obj)) { - PyErr_SetString(PyExc_TypeError, - "expecting a bytes object, got unicode"); - return NULL; - } - - if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0) - return NULL; - - if (size != 0) { - n = write_bytes(self, bytes, size); - if (n < 0) - return NULL; - } - - return PyInt_FromSsize_t(n); -} - -PyDoc_STRVAR(writelines_doc, -"writelines(sequence_of_strings) -> None. Write strings to the file.\n" -"\n" -"Note that newlines are not added. The sequence can be any iterable\n" -"object producing strings. This is equivalent to calling write() for\n" -"each string."); - -static PyObject * -bytesio_writelines(BytesIOObject *self, PyObject *v) -{ - PyObject *it, *item; - PyObject *ret; - - CHECK_CLOSED(self); - - it = PyObject_GetIter(v); - if (it == NULL) - return NULL; - - while ((item = PyIter_Next(it)) != NULL) { - ret = bytesio_write(self, item); - Py_DECREF(item); - if (ret == NULL) { - Py_DECREF(it); - return NULL; - } - Py_DECREF(ret); - } - Py_DECREF(it); - - /* See if PyIter_Next failed */ - if (PyErr_Occurred()) - return NULL; - - Py_RETURN_NONE; -} - -PyDoc_STRVAR(close_doc, -"close() -> None. Disable all I/O operations."); - -static PyObject * -bytesio_close(BytesIOObject *self) -{ - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } - Py_RETURN_NONE; -} - -static void -bytesio_dealloc(BytesIOObject *self) -{ - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } - Py_TYPE(self)->tp_free(self); -} - -static PyObject * -bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - BytesIOObject *self; - - assert(type != NULL && type->tp_alloc != NULL); - self = (BytesIOObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - - self->string_size = 0; - self->pos = 0; - self->buf_size = 0; - self->buf = (char *)PyMem_Malloc(0); - if (self->buf == NULL) { - Py_DECREF(self); - return PyErr_NoMemory(); - } - - return (PyObject *)self; -} - -static int -bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds) -{ - PyObject *initvalue = NULL; - - if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue)) - return -1; - - /* In case, __init__ is called multiple times. */ - self->string_size = 0; - self->pos = 0; - - if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; - } - - return 0; -} - -static PyGetSetDef bytesio_getsetlist[] = { - {"closed", (getter)bytesio_get_closed, NULL, - "True if the file is closed."}, - {0}, /* sentinel */ -}; - -static struct PyMethodDef bytesio_methods[] = { - {"readable", (PyCFunction)return_true, METH_NOARGS, NULL}, - {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL}, - {"writable", (PyCFunction)return_true, METH_NOARGS, NULL}, - {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc}, - {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc}, - {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc}, - {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc}, - {"write", (PyCFunction)bytesio_write, METH_O, write_doc}, - {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc}, - {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc}, - {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc}, - {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, - {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, - {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, - {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc}, - {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, - {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, - {NULL, NULL} /* sentinel */ -}; - -PyDoc_STRVAR(bytesio_doc, -"BytesIO([buffer]) -> object\n" -"\n" -"Create a buffered I/O implementation using an in-memory bytes\n" -"buffer, ready for reading and writing."); - -static PyTypeObject BytesIO_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_bytesio._BytesIO", /*tp_name*/ - sizeof(BytesIOObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)bytesio_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - bytesio_doc, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - PyObject_SelfIter, /*tp_iter*/ - (iternextfunc)bytesio_iternext, /*tp_iternext*/ - bytesio_methods, /*tp_methods*/ - 0, /*tp_members*/ - bytesio_getsetlist, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - (initproc)bytesio_init, /*tp_init*/ - 0, /*tp_alloc*/ - bytesio_new, /*tp_new*/ -}; - -PyMODINIT_FUNC -init_bytesio(void) -{ - PyObject *m; - - if (PyType_Ready(&BytesIO_Type) < 0) - return; - m = Py_InitModule("_bytesio", NULL); - if (m == NULL) - return; - Py_INCREF(&BytesIO_Type); - PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type); -} diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_fileio.c --- a/Modules/_fileio.c +++ /dev/null @@ -1,935 +0,0 @@ -/* Author: Daniel Stutzbach */ - -#define PY_SSIZE_T_CLEAN -#include "Python.h" -#include -#include -#include -#include /* For offsetof */ - -/* - * Known likely problems: - * - * - Files larger then 2**32-1 - * - Files with unicode filenames - * - Passing numbers greater than 2**32-1 when an integer is expected - * - Making it work on Windows and other oddball platforms - * - * To Do: - * - * - autoconfify header file inclusion - */ - -#ifdef MS_WINDOWS -/* can simulate truncate with Win32 API functions; see file_truncate */ -#define HAVE_FTRUNCATE -#define WIN32_LEAN_AND_MEAN -#include -#endif - -typedef struct { - PyObject_HEAD - int fd; - unsigned readable : 1; - unsigned writable : 1; - int seekable : 2; /* -1 means unknown */ - int closefd : 1; - PyObject *weakreflist; -} PyFileIOObject; - -PyTypeObject PyFileIO_Type; - -#define PyFileIO_Check(op) (PyObject_TypeCheck((op), &PyFileIO_Type)) - -static PyObject * -portable_lseek(int fd, PyObject *posobj, int whence); - -/* Returns 0 on success, errno (which is < 0) on failure. */ -static int -internal_close(PyFileIOObject *self) -{ - int save_errno = 0; - if (self->fd >= 0) { - int fd = self->fd; - self->fd = -1; - Py_BEGIN_ALLOW_THREADS - if (close(fd) < 0) - save_errno = errno; - Py_END_ALLOW_THREADS - } - return save_errno; -} - -static PyObject * -fileio_close(PyFileIOObject *self) -{ - if (!self->closefd) { - self->fd = -1; - Py_RETURN_NONE; - } - errno = internal_close(self); - if (errno < 0) { - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - Py_RETURN_NONE; -} - -static PyObject * -fileio_new(PyTypeObject *type, PyObject *args, PyObject *kews) -{ - PyFileIOObject *self; - - assert(type != NULL && type->tp_alloc != NULL); - - self = (PyFileIOObject *) type->tp_alloc(type, 0); - if (self != NULL) { - self->fd = -1; - self->readable = 0; - self->writable = 0; - self->seekable = -1; - self->closefd = 1; - self->weakreflist = NULL; - } - - return (PyObject *) self; -} - -/* On Unix, open will succeed for directories. - In Python, there should be no file objects referring to - directories, so we need a check. */ - -static int -dircheck(PyFileIOObject* self, char *name) -{ -#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR) - struct stat buf; - if (self->fd < 0) - return 0; - if (fstat(self->fd, &buf) == 0 && S_ISDIR(buf.st_mode)) { - char *msg = strerror(EISDIR); - PyObject *exc; - internal_close(self); - - exc = PyObject_CallFunction(PyExc_IOError, "(iss)", - EISDIR, msg, name); - PyErr_SetObject(PyExc_IOError, exc); - Py_XDECREF(exc); - return -1; - } -#endif - return 0; -} - -static int -check_fd(int fd) -{ -#if defined(HAVE_FSTAT) - struct stat buf; - if (!_PyVerify_fd(fd) || (fstat(fd, &buf) < 0 && errno == EBADF)) { - PyObject *exc; - char *msg = strerror(EBADF); - exc = PyObject_CallFunction(PyExc_OSError, "(is)", - EBADF, msg); - PyErr_SetObject(PyExc_OSError, exc); - Py_XDECREF(exc); - return -1; - } -#endif - return 0; -} - - -static int -fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) -{ - PyFileIOObject *self = (PyFileIOObject *) oself; - static char *kwlist[] = {"file", "mode", "closefd", NULL}; - char *name = NULL; - char *mode = "r"; - char *s; -#ifdef MS_WINDOWS - Py_UNICODE *widename = NULL; -#endif - int ret = 0; - int rwa = 0, plus = 0, append = 0; - int flags = 0; - int fd = -1; - int closefd = 1; - - assert(PyFileIO_Check(oself)); - if (self->fd >= 0) { - /* Have to close the existing file first. */ - if (internal_close(self) < 0) - return -1; - } - - if (PyArg_ParseTupleAndKeywords(args, kwds, "i|si:fileio", - kwlist, &fd, &mode, &closefd)) { - if (fd < 0) { - PyErr_SetString(PyExc_ValueError, - "Negative filedescriptor"); - return -1; - } - if (check_fd(fd)) - return -1; - } - else { - PyErr_Clear(); - -#ifdef MS_WINDOWS - if (GetVersion() < 0x80000000) { - /* On NT, so wide API available */ - PyObject *po; - if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:fileio", - kwlist, &po, &mode, &closefd) - ) { - widename = PyUnicode_AS_UNICODE(po); - } else { - /* Drop the argument parsing error as narrow - strings are also valid. */ - PyErr_Clear(); - } - } - if (widename == NULL) -#endif - { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:fileio", - kwlist, - Py_FileSystemDefaultEncoding, - &name, &mode, &closefd)) - return -1; - } - } - - s = mode; - while (*s) { - switch (*s++) { - case 'r': - if (rwa) { - bad_mode: - PyErr_SetString(PyExc_ValueError, - "Must have exactly one of read/write/append mode"); - goto error; - } - rwa = 1; - self->readable = 1; - break; - case 'w': - if (rwa) - goto bad_mode; - rwa = 1; - self->writable = 1; - flags |= O_CREAT | O_TRUNC; - break; - case 'a': - if (rwa) - goto bad_mode; - rwa = 1; - self->writable = 1; - flags |= O_CREAT; - append = 1; - break; - case 'b': - break; - case '+': - if (plus) - goto bad_mode; - self->readable = self->writable = 1; - plus = 1; - break; - default: - PyErr_Format(PyExc_ValueError, - "invalid mode: %.200s", mode); - goto error; - } - } - - if (!rwa) - goto bad_mode; - - if (self->readable && self->writable) - flags |= O_RDWR; - else if (self->readable) - flags |= O_RDONLY; - else - flags |= O_WRONLY; - -#ifdef O_BINARY - flags |= O_BINARY; -#endif - -#ifdef O_APPEND - if (append) - flags |= O_APPEND; -#endif - - if (fd >= 0) { - self->fd = fd; - self->closefd = closefd; - } - else { - self->closefd = 1; - if (!closefd) { - PyErr_SetString(PyExc_ValueError, - "Cannot use closefd=False with file name"); - goto error; - } - - Py_BEGIN_ALLOW_THREADS - errno = 0; -#ifdef MS_WINDOWS - if (widename != NULL) - self->fd = _wopen(widename, flags, 0666); - else -#endif - self->fd = open(name, flags, 0666); - Py_END_ALLOW_THREADS - if (self->fd < 0) { -#ifdef MS_WINDOWS - if (widename != NULL) - PyErr_SetFromErrnoWithUnicodeFilename(PyExc_IOError, widename); - else -#endif - PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); - goto error; - } - if(dircheck(self, name) < 0) - goto error; - } - - if (append) { - /* For consistent behaviour, we explicitly seek to the - end of file (otherwise, it might be done only on the - first write()). */ - PyObject *pos = portable_lseek(self->fd, NULL, 2); - if (pos == NULL) - goto error; - Py_DECREF(pos); - } - - goto done; - - error: - ret = -1; - - done: - PyMem_Free(name); - return ret; -} - -static void -fileio_dealloc(PyFileIOObject *self) -{ - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); - - if (self->fd >= 0 && self->closefd) { - errno = internal_close(self); - if (errno < 0) { - PySys_WriteStderr("close failed: [Errno %d] %s\n", - errno, strerror(errno)); - } - } - - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyObject * -err_closed(void) -{ - PyErr_SetString(PyExc_ValueError, "I/O operation on closed file"); - return NULL; -} - -static PyObject * -err_mode(char *action) -{ - PyErr_Format(PyExc_ValueError, "File not open for %s", action); - return NULL; -} - -static PyObject * -fileio_fileno(PyFileIOObject *self) -{ - if (self->fd < 0) - return err_closed(); - return PyInt_FromLong((long) self->fd); -} - -static PyObject * -fileio_readable(PyFileIOObject *self) -{ - if (self->fd < 0) - return err_closed(); - return PyBool_FromLong((long) self->readable); -} - -static PyObject * -fileio_writable(PyFileIOObject *self) -{ - if (self->fd < 0) - return err_closed(); - return PyBool_FromLong((long) self->writable); -} - -static PyObject * -fileio_seekable(PyFileIOObject *self) -{ - if (self->fd < 0) - return err_closed(); - if (self->seekable < 0) { - int ret; - Py_BEGIN_ALLOW_THREADS - ret = lseek(self->fd, 0, SEEK_CUR); - Py_END_ALLOW_THREADS - if (ret < 0) - self->seekable = 0; - else - self->seekable = 1; - } - return PyBool_FromLong((long) self->seekable); -} - -static PyObject * -fileio_readinto(PyFileIOObject *self, PyObject *args) -{ - Py_buffer pbuf; - Py_ssize_t n; - - if (self->fd < 0) - return err_closed(); - if (!self->readable) - return err_mode("reading"); - - if (!PyArg_ParseTuple(args, "w*", &pbuf)) - return NULL; - - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = read(self->fd, pbuf.buf, pbuf.len); - Py_END_ALLOW_THREADS - PyBuffer_Release(&pbuf); - if (n < 0) { - if (errno == EAGAIN) - Py_RETURN_NONE; - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - return PyLong_FromSsize_t(n); -} - -#define DEFAULT_BUFFER_SIZE (8*1024) - -static PyObject * -fileio_readall(PyFileIOObject *self) -{ - PyObject *result; - Py_ssize_t total = 0; - int n; - - result = PyString_FromStringAndSize(NULL, DEFAULT_BUFFER_SIZE); - if (result == NULL) - return NULL; - - while (1) { - Py_ssize_t newsize = total + DEFAULT_BUFFER_SIZE; - if (PyString_GET_SIZE(result) < newsize) { - if (_PyString_Resize(&result, newsize) < 0) { - if (total == 0) { - Py_DECREF(result); - return NULL; - } - PyErr_Clear(); - break; - } - } - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = read(self->fd, - PyString_AS_STRING(result) + total, - newsize - total); - Py_END_ALLOW_THREADS - if (n == 0) - break; - if (n < 0) { - if (total > 0) - break; - if (errno == EAGAIN) { - Py_DECREF(result); - Py_RETURN_NONE; - } - Py_DECREF(result); - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - total += n; - } - - if (PyString_GET_SIZE(result) > total) { - if (_PyString_Resize(&result, total) < 0) { - /* This should never happen, but just in case */ - Py_DECREF(result); - return NULL; - } - } - return result; -} - -static PyObject * -fileio_read(PyFileIOObject *self, PyObject *args) -{ - char *ptr; - Py_ssize_t n; - Py_ssize_t size = -1; - PyObject *bytes; - - if (self->fd < 0) - return err_closed(); - if (!self->readable) - return err_mode("reading"); - - if (!PyArg_ParseTuple(args, "|n", &size)) - return NULL; - - if (size < 0) { - return fileio_readall(self); - } - - bytes = PyString_FromStringAndSize(NULL, size); - if (bytes == NULL) - return NULL; - ptr = PyString_AS_STRING(bytes); - - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = read(self->fd, ptr, size); - Py_END_ALLOW_THREADS - - if (n < 0) { - if (errno == EAGAIN) - Py_RETURN_NONE; - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - if (n != size) { - if (_PyString_Resize(&bytes, n) < 0) { - Py_DECREF(bytes); - return NULL; - } - } - - return (PyObject *) bytes; -} - -static PyObject * -fileio_write(PyFileIOObject *self, PyObject *args) -{ - Py_buffer pbuf; - Py_ssize_t n; - - if (self->fd < 0) - return err_closed(); - if (!self->writable) - return err_mode("writing"); - - if (!PyArg_ParseTuple(args, "s*", &pbuf)) - return NULL; - - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = write(self->fd, pbuf.buf, pbuf.len); - Py_END_ALLOW_THREADS - - PyBuffer_Release(&pbuf); - - if (n < 0) { - if (errno == EAGAIN) - Py_RETURN_NONE; - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - return PyLong_FromSsize_t(n); -} - -/* XXX Windows support below is likely incomplete */ - -#if defined(MS_WIN64) || defined(MS_WINDOWS) -typedef PY_LONG_LONG Py_off_t; -#else -typedef off_t Py_off_t; -#endif - -/* Cribbed from posix_lseek() */ -static PyObject * -portable_lseek(int fd, PyObject *posobj, int whence) -{ - Py_off_t pos, res; - -#ifdef SEEK_SET - /* Turn 0, 1, 2 into SEEK_{SET,CUR,END} */ - switch (whence) { -#if SEEK_SET != 0 - case 0: whence = SEEK_SET; break; -#endif -#if SEEK_CUR != 1 - case 1: whence = SEEK_CUR; break; -#endif -#if SEEK_END != 2 - case 2: whence = SEEK_END; break; -#endif - } -#endif /* SEEK_SET */ - - if (posobj == NULL) - pos = 0; - else { - if(PyFloat_Check(posobj)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return NULL; - } -#if defined(HAVE_LARGEFILE_SUPPORT) - pos = PyLong_AsLongLong(posobj); -#else - pos = PyLong_AsLong(posobj); -#endif - if (PyErr_Occurred()) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS -#if defined(MS_WIN64) || defined(MS_WINDOWS) - res = _lseeki64(fd, pos, whence); -#else - res = lseek(fd, pos, whence); -#endif - Py_END_ALLOW_THREADS - if (res < 0) - return PyErr_SetFromErrno(PyExc_IOError); - -#if defined(HAVE_LARGEFILE_SUPPORT) - return PyLong_FromLongLong(res); -#else - return PyLong_FromLong(res); -#endif -} - -static PyObject * -fileio_seek(PyFileIOObject *self, PyObject *args) -{ - PyObject *posobj; - int whence = 0; - - if (self->fd < 0) - return err_closed(); - - if (!PyArg_ParseTuple(args, "O|i", &posobj, &whence)) - return NULL; - - return portable_lseek(self->fd, posobj, whence); -} - -static PyObject * -fileio_tell(PyFileIOObject *self, PyObject *args) -{ - if (self->fd < 0) - return err_closed(); - - return portable_lseek(self->fd, NULL, 1); -} - -#ifdef HAVE_FTRUNCATE -static PyObject * -fileio_truncate(PyFileIOObject *self, PyObject *args) -{ - PyObject *posobj = NULL; - Py_off_t pos; - int ret; - int fd; - - fd = self->fd; - if (fd < 0) - return err_closed(); - if (!self->writable) - return err_mode("writing"); - - if (!PyArg_ParseTuple(args, "|O", &posobj)) - return NULL; - - if (posobj == Py_None || posobj == NULL) { - /* Get the current position. */ - posobj = portable_lseek(fd, NULL, 1); - if (posobj == NULL) - return NULL; - } - else { - /* Move to the position to be truncated. */ - posobj = portable_lseek(fd, posobj, 0); - } - -#if defined(HAVE_LARGEFILE_SUPPORT) - pos = PyLong_AsLongLong(posobj); -#else - pos = PyLong_AsLong(posobj); -#endif - if (PyErr_Occurred()) - return NULL; - -#ifdef MS_WINDOWS - /* MS _chsize doesn't work if newsize doesn't fit in 32 bits, - so don't even try using it. */ - { - HANDLE hFile; - - /* Truncate. Note that this may grow the file! */ - Py_BEGIN_ALLOW_THREADS - errno = 0; - hFile = (HANDLE)_get_osfhandle(fd); - ret = hFile == (HANDLE)-1; - if (ret == 0) { - ret = SetEndOfFile(hFile) == 0; - if (ret) - errno = EACCES; - } - Py_END_ALLOW_THREADS - } -#else - Py_BEGIN_ALLOW_THREADS - errno = 0; - ret = ftruncate(fd, pos); - Py_END_ALLOW_THREADS -#endif /* !MS_WINDOWS */ - - if (ret != 0) { - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - return posobj; -} -#endif - -static char * -mode_string(PyFileIOObject *self) -{ - if (self->readable) { - if (self->writable) - return "rb+"; - else - return "rb"; - } - else - return "wb"; -} - -static PyObject * -fileio_repr(PyFileIOObject *self) -{ - if (self->fd < 0) - return PyString_FromFormat("_fileio._FileIO(-1)"); - - return PyString_FromFormat("_fileio._FileIO(%d, '%s')", - self->fd, mode_string(self)); -} - -static PyObject * -fileio_isatty(PyFileIOObject *self) -{ - long res; - - if (self->fd < 0) - return err_closed(); - Py_BEGIN_ALLOW_THREADS - res = isatty(self->fd); - Py_END_ALLOW_THREADS - return PyBool_FromLong(res); -} - - -PyDoc_STRVAR(fileio_doc, -"file(name: str[, mode: str]) -> file IO object\n" -"\n" -"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n" -"writing or appending. The file will be created if it doesn't exist\n" -"when opened for writing or appending; it will be truncated when\n" -"opened for writing. Add a '+' to the mode to allow simultaneous\n" -"reading and writing."); - -PyDoc_STRVAR(read_doc, -"read(size: int) -> bytes. read at most size bytes, returned as bytes.\n" -"\n" -"Only makes one system call, so less data may be returned than requested\n" -"In non-blocking mode, returns None if no data is available.\n" -"On end-of-file, returns ''."); - -PyDoc_STRVAR(readall_doc, -"readall() -> bytes. read all data from the file, returned as bytes.\n" -"\n" -"In non-blocking mode, returns as much as is immediately available,\n" -"or None if no data is available. On end-of-file, returns ''."); - -PyDoc_STRVAR(write_doc, -"write(b: bytes) -> int. Write bytes b to file, return number written.\n" -"\n" -"Only makes one system call, so not all of the data may be written.\n" -"The number of bytes actually written is returned."); - -PyDoc_STRVAR(fileno_doc, -"fileno() -> int. \"file descriptor\".\n" -"\n" -"This is needed for lower-level file interfaces, such the fcntl module."); - -PyDoc_STRVAR(seek_doc, -"seek(offset: int[, whence: int]) -> None. Move to new file position.\n" -"\n" -"Argument offset is a byte count. Optional argument whence defaults to\n" -"0 (offset from start of file, offset should be >= 0); other values are 1\n" -"(move relative to current position, positive or negative), and 2 (move\n" -"relative to end of file, usually negative, although many platforms allow\n" -"seeking beyond the end of a file)." -"\n" -"Note that not all file objects are seekable."); - -#ifdef HAVE_FTRUNCATE -PyDoc_STRVAR(truncate_doc, -"truncate([size: int]) -> None. Truncate the file to at most size bytes.\n" -"\n" -"Size defaults to the current file position, as returned by tell()." -"The current file position is changed to the value of size."); -#endif - -PyDoc_STRVAR(tell_doc, -"tell() -> int. Current file position"); - -PyDoc_STRVAR(readinto_doc, -"readinto() -> Undocumented. Don't use this; it may go away."); - -PyDoc_STRVAR(close_doc, -"close() -> None. Close the file.\n" -"\n" -"A closed file cannot be used for further I/O operations. close() may be\n" -"called more than once without error. Changes the fileno to -1."); - -PyDoc_STRVAR(isatty_doc, -"isatty() -> bool. True if the file is connected to a tty device."); - -PyDoc_STRVAR(seekable_doc, -"seekable() -> bool. True if file supports random-access."); - -PyDoc_STRVAR(readable_doc, -"readable() -> bool. True if file was opened in a read mode."); - -PyDoc_STRVAR(writable_doc, -"writable() -> bool. True if file was opened in a write mode."); - -static PyMethodDef fileio_methods[] = { - {"read", (PyCFunction)fileio_read, METH_VARARGS, read_doc}, - {"readall", (PyCFunction)fileio_readall, METH_NOARGS, readall_doc}, - {"readinto", (PyCFunction)fileio_readinto, METH_VARARGS, readinto_doc}, - {"write", (PyCFunction)fileio_write, METH_VARARGS, write_doc}, - {"seek", (PyCFunction)fileio_seek, METH_VARARGS, seek_doc}, - {"tell", (PyCFunction)fileio_tell, METH_VARARGS, tell_doc}, -#ifdef HAVE_FTRUNCATE - {"truncate", (PyCFunction)fileio_truncate, METH_VARARGS, truncate_doc}, -#endif - {"close", (PyCFunction)fileio_close, METH_NOARGS, close_doc}, - {"seekable", (PyCFunction)fileio_seekable, METH_NOARGS, seekable_doc}, - {"readable", (PyCFunction)fileio_readable, METH_NOARGS, readable_doc}, - {"writable", (PyCFunction)fileio_writable, METH_NOARGS, writable_doc}, - {"fileno", (PyCFunction)fileio_fileno, METH_NOARGS, fileno_doc}, - {"isatty", (PyCFunction)fileio_isatty, METH_NOARGS, isatty_doc}, - {NULL, NULL} /* sentinel */ -}; - -/* 'closed' and 'mode' are attributes for backwards compatibility reasons. */ - -static PyObject * -get_closed(PyFileIOObject *self, void *closure) -{ - return PyBool_FromLong((long)(self->fd < 0)); -} - -static PyObject * -get_closefd(PyFileIOObject *self, void *closure) -{ - return PyBool_FromLong((long)(self->closefd)); -} - -static PyObject * -get_mode(PyFileIOObject *self, void *closure) -{ - return PyString_FromString(mode_string(self)); -} - -static PyGetSetDef fileio_getsetlist[] = { - {"closed", (getter)get_closed, NULL, "True if the file is closed"}, - {"closefd", (getter)get_closefd, NULL, - "True if the file descriptor will be closed"}, - {"mode", (getter)get_mode, NULL, "String giving the file mode"}, - {0}, -}; - -PyTypeObject PyFileIO_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_FileIO", - sizeof(PyFileIOObject), - 0, - (destructor)fileio_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - (reprfunc)fileio_repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - fileio_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - offsetof(PyFileIOObject, weakreflist), /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - fileio_methods, /* tp_methods */ - 0, /* tp_members */ - fileio_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - fileio_init, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - fileio_new, /* tp_new */ - PyObject_Del, /* tp_free */ -}; - -static PyMethodDef module_methods[] = { - {NULL, NULL} -}; - -PyMODINIT_FUNC -init_fileio(void) -{ - PyObject *m; /* a module object */ - - m = Py_InitModule3("_fileio", module_methods, - "Fast implementation of io.FileIO."); - if (m == NULL) - return; - if (PyType_Ready(&PyFileIO_Type) < 0) - return; - Py_INCREF(&PyFileIO_Type); - PyModule_AddObject(m, "_FileIO", (PyObject *) &PyFileIO_Type); -} diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/_iomodule.c --- /dev/null +++ b/Modules/_io/_iomodule.c @@ -0,0 +1,735 @@ +/* + An implementation of the new I/O lib as defined by PEP 3116 - "New I/O" + + Classes defined here: UnsupportedOperation, BlockingIOError. + Functions defined here: open(). + + Mostly written by Amaury Forgeot d'Arc +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#ifdef HAVE_SYS_STAT_H +#include +#endif /* HAVE_SYS_STAT_H */ + + +/* Various interned strings */ + +PyObject *_PyIO_str_close; +PyObject *_PyIO_str_closed; +PyObject *_PyIO_str_decode; +PyObject *_PyIO_str_encode; +PyObject *_PyIO_str_fileno; +PyObject *_PyIO_str_flush; +PyObject *_PyIO_str_getstate; +PyObject *_PyIO_str_isatty; +PyObject *_PyIO_str_newlines; +PyObject *_PyIO_str_nl; +PyObject *_PyIO_str_read; +PyObject *_PyIO_str_read1; +PyObject *_PyIO_str_readable; +PyObject *_PyIO_str_readinto; +PyObject *_PyIO_str_readline; +PyObject *_PyIO_str_reset; +PyObject *_PyIO_str_seek; +PyObject *_PyIO_str_seekable; +PyObject *_PyIO_str_setstate; +PyObject *_PyIO_str_tell; +PyObject *_PyIO_str_truncate; +PyObject *_PyIO_str_writable; +PyObject *_PyIO_str_write; + +PyObject *_PyIO_empty_str; +PyObject *_PyIO_empty_bytes; +PyObject *_PyIO_zero; + + +PyDoc_STRVAR(module_doc, +"The io module provides the Python interfaces to stream handling. The\n" +"builtin open function is defined in this module.\n" +"\n" +"At the top of the I/O hierarchy is the abstract base class IOBase. It\n" +"defines the basic interface to a stream. Note, however, that there is no\n" +"seperation between reading and writing to streams; implementations are\n" +"allowed to throw an IOError if they do not support a given operation.\n" +"\n" +"Extending IOBase is RawIOBase which deals simply with the reading and\n" +"writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide\n" +"an interface to OS files.\n" +"\n" +"BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its\n" +"subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer\n" +"streams that are readable, writable, and both respectively.\n" +"BufferedRandom provides a buffered interface to random access\n" +"streams. BytesIO is a simple stream of in-memory bytes.\n" +"\n" +"Another IOBase subclass, TextIOBase, deals with the encoding and decoding\n" +"of streams into text. TextIOWrapper, which extends it, is a buffered text\n" +"interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO\n" +"is a in-memory stream for text.\n" +"\n" +"Argument names are not part of the specification, and only the arguments\n" +"of open() are intended to be used as keyword arguments.\n" +"\n" +"data:\n" +"\n" +"DEFAULT_BUFFER_SIZE\n" +"\n" +" An int containing the default buffer size used by the module's buffered\n" +" I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n" +" possible.\n" + ); + + +/* + * BlockingIOError extends IOError + */ + +static int +BlockingIOError_init(PyBlockingIOErrorObject *self, PyObject *args, + PyObject *kwds) +{ + PyObject *myerrno = NULL, *strerror = NULL; + PyObject *baseargs = NULL; + Py_ssize_t written = 0; + + assert(PyTuple_Check(args)); + + self->written = 0; + if (!PyArg_ParseTuple(args, "OO|n:BlockingIOError", + &myerrno, &strerror, &written)) + return -1; + + baseargs = PyTuple_Pack(2, myerrno, strerror); + if (baseargs == NULL) + return -1; + /* This will take care of initializing of myerrno and strerror members */ + if (((PyTypeObject *)PyExc_IOError)->tp_init( + (PyObject *)self, baseargs, kwds) == -1) { + Py_DECREF(baseargs); + return -1; + } + Py_DECREF(baseargs); + + self->written = written; + return 0; +} + +static PyMemberDef BlockingIOError_members[] = { + {"characters_written", T_PYSSIZET, offsetof(PyBlockingIOErrorObject, written), 0}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject _PyExc_BlockingIOError = { + PyVarObject_HEAD_INIT(NULL, 0) + "BlockingIOError", /*tp_name*/ + sizeof(PyBlockingIOErrorObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + PyDoc_STR("Exception raised when I/O would block " + "on a non-blocking I/O stream"), /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + BlockingIOError_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)BlockingIOError_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; +PyObject *PyExc_BlockingIOError = (PyObject *)&_PyExc_BlockingIOError; + + +/* + * The main open() function + */ +PyDoc_STRVAR(open_doc, +"Open file and return a stream. Raise IOError upon failure.\n" +"\n" +"file is either a text or byte string giving the name (and the path\n" +"if the file isn't in the current working directory) of the file to\n" +"be opened or an integer file descriptor of the file to be\n" +"wrapped. (If a file descriptor is given, it is closed when the\n" +"returned I/O object is closed, unless closefd is set to False.)\n" +"\n" +"mode is an optional string that specifies the mode in which the file\n" +"is opened. It defaults to 'r' which means open for reading in text\n" +"mode. Other common values are 'w' for writing (truncating the file if\n" +"it already exists), and 'a' for appending (which on some Unix systems,\n" +"means that all writes append to the end of the file regardless of the\n" +"current seek position). In text mode, if encoding is not specified the\n" +"encoding used is platform dependent. (For reading and writing raw\n" +"bytes use binary mode and leave encoding unspecified.) The available\n" +"modes are:\n" +"\n" +"========= ===============================================================\n" +"Character Meaning\n" +"--------- ---------------------------------------------------------------\n" +"'r' open for reading (default)\n" +"'w' open for writing, truncating the file first\n" +"'a' open for writing, appending to the end of the file if it exists\n" +"'b' binary mode\n" +"'t' text mode (default)\n" +"'+' open a disk file for updating (reading and writing)\n" +"'U' universal newline mode (for backwards compatibility; unneeded\n" +" for new code)\n" +"========= ===============================================================\n" +"\n" +"The default mode is 'rt' (open for reading text). For binary random\n" +"access, the mode 'w+b' opens and truncates the file to 0 bytes, while\n" +"'r+b' opens the file without truncation.\n" +"\n" +"Python distinguishes between files opened in binary and text modes,\n" +"even when the underlying operating system doesn't. Files opened in\n" +"binary mode (appending 'b' to the mode argument) return contents as\n" +"bytes objects without any decoding. In text mode (the default, or when\n" +"'t' is appended to the mode argument), the contents of the file are\n" +"returned as strings, the bytes having been first decoded using a\n" +"platform-dependent encoding or using the specified encoding if given.\n" +"\n" +"buffering is an optional integer used to set the buffering policy. By\n" +"default full buffering is on. Pass 0 to switch buffering off (only\n" +"allowed in binary mode), 1 to set line buffering, and an integer > 1\n" +"for full buffering.\n" +"\n" +"encoding is the name of the encoding used to decode or encode the\n" +"file. This should only be used in text mode. The default encoding is\n" +"platform dependent, but any encoding supported by Python can be\n" +"passed. See the codecs module for the list of supported encodings.\n" +"\n" +"errors is an optional string that specifies how encoding errors are to\n" +"be handled---this argument should not be used in binary mode. Pass\n" +"'strict' to raise a ValueError exception if there is an encoding error\n" +"(the default of None has the same effect), or pass 'ignore' to ignore\n" +"errors. (Note that ignoring encoding errors can lead to data loss.)\n" +"See the documentation for codecs.register for a list of the permitted\n" +"encoding error strings.\n" +"\n" +"newline controls how universal newlines works (it only applies to text\n" +"mode). It can be None, '', '\\n', '\\r', and '\\r\\n'. It works as\n" +"follows:\n" +"\n" +"* On input, if newline is None, universal newlines mode is\n" +" enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n" +" these are translated into '\\n' before being returned to the\n" +" caller. If it is '', universal newline mode is enabled, but line\n" +" endings are returned to the caller untranslated. If it has any of\n" +" the other legal values, input lines are only terminated by the given\n" +" string, and the line ending is returned to the caller untranslated.\n" +"\n" +"* On output, if newline is None, any '\\n' characters written are\n" +" translated to the system default line separator, os.linesep. If\n" +" newline is '', no translation takes place. If newline is any of the\n" +" other legal values, any '\\n' characters written are translated to\n" +" the given string.\n" +"\n" +"If closefd is False, the underlying file descriptor will be kept open\n" +"when the file is closed. This does not work when a file name is given\n" +"and must be True in that case.\n" +"\n" +"open() returns a file object whose type depends on the mode, and\n" +"through which the standard file operations such as reading and writing\n" +"are performed. When open() is used to open a file in a text mode ('w',\n" +"'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open\n" +"a file in a binary mode, the returned class varies: in read binary\n" +"mode, it returns a BufferedReader; in write binary and append binary\n" +"modes, it returns a BufferedWriter, and in read/write mode, it returns\n" +"a BufferedRandom.\n" +"\n" +"It is also possible to use a string or bytearray as a file for both\n" +"reading and writing. For strings StringIO can be used like a file\n" +"opened in a text mode, and for bytes a BytesIO can be used like a file\n" +"opened in a binary mode.\n" + ); + +static PyObject * +io_open(PyObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"file", "mode", "buffering", + "encoding", "errors", "newline", + "closefd", NULL}; + PyObject *file; + char *mode = "r"; + int buffering = -1, closefd = 1; + char *encoding = NULL, *errors = NULL, *newline = NULL; + unsigned i; + + int reading = 0, writing = 0, appending = 0, updating = 0; + int text = 0, binary = 0, universal = 0; + + char rawmode[5], *m; + int line_buffering, isatty; + + PyObject *raw, *modeobj = NULL, *buffer = NULL, *wrapper = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|sizzzi:open", kwlist, + &file, &mode, &buffering, + &encoding, &errors, &newline, + &closefd)) { + return NULL; + } + + if (!PyUnicode_Check(file) && + !PyBytes_Check(file) && + !PyNumber_Check(file)) { + PyObject *repr = PyObject_Repr(file); + if (repr != NULL) { + PyErr_Format(PyExc_TypeError, "invalid file: %s", + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + return NULL; + } + + /* Decode mode */ + for (i = 0; i < strlen(mode); i++) { + char c = mode[i]; + + switch (c) { + case 'r': + reading = 1; + break; + case 'w': + writing = 1; + break; + case 'a': + appending = 1; + break; + case '+': + updating = 1; + break; + case 't': + text = 1; + break; + case 'b': + binary = 1; + break; + case 'U': + universal = 1; + reading = 1; + break; + default: + goto invalid_mode; + } + + /* c must not be duplicated */ + if (strchr(mode+i+1, c)) { + invalid_mode: + PyErr_Format(PyExc_ValueError, "invalid mode: '%s'", mode); + return NULL; + } + + } + + m = rawmode; + if (reading) *(m++) = 'r'; + if (writing) *(m++) = 'w'; + if (appending) *(m++) = 'a'; + if (updating) *(m++) = '+'; + *m = '\0'; + + /* Parameters validation */ + if (universal) { + if (writing || appending) { + PyErr_SetString(PyExc_ValueError, + "can't use U and writing mode at once"); + return NULL; + } + reading = 1; + } + + if (text && binary) { + PyErr_SetString(PyExc_ValueError, + "can't have text and binary mode at once"); + return NULL; + } + + if (reading + writing + appending > 1) { + PyErr_SetString(PyExc_ValueError, + "must have exactly one of read/write/append mode"); + return NULL; + } + + if (binary && encoding != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take an encoding argument"); + return NULL; + } + + if (binary && errors != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take an errors argument"); + return NULL; + } + + if (binary && newline != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take a newline argument"); + return NULL; + } + + /* Create the Raw file stream */ + raw = PyObject_CallFunction((PyObject *)&PyFileIO_Type, + "Osi", file, rawmode, closefd); + if (raw == NULL) + return NULL; + + modeobj = PyUnicode_FromString(mode); + if (modeobj == NULL) + goto error; + + /* buffering */ + { + PyObject *res = PyObject_CallMethod(raw, "isatty", NULL); + if (res == NULL) + goto error; + isatty = PyLong_AsLong(res); + Py_DECREF(res); + if (isatty == -1 && PyErr_Occurred()) + goto error; + } + + if (buffering == 1 || (buffering < 0 && isatty)) { + buffering = -1; + line_buffering = 1; + } + else + line_buffering = 0; + + if (buffering < 0) { + buffering = DEFAULT_BUFFER_SIZE; +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + { + struct stat st; + long fileno; + PyObject *res = PyObject_CallMethod(raw, "fileno", NULL); + if (res == NULL) + goto error; + + fileno = PyInt_AsLong(res); + Py_DECREF(res); + if (fileno == -1 && PyErr_Occurred()) + goto error; + + if (fstat(fileno, &st) >= 0) + buffering = st.st_blksize; + } +#endif + } + if (buffering < 0) { + PyErr_SetString(PyExc_ValueError, + "invalid buffering size"); + goto error; + } + + /* if not buffering, returns the raw file object */ + if (buffering == 0) { + if (!binary) { + PyErr_SetString(PyExc_ValueError, + "can't have unbuffered text I/O"); + goto error; + } + + Py_DECREF(modeobj); + return raw; + } + + /* wraps into a buffered file */ + { + PyObject *Buffered_class; + + if (updating) + Buffered_class = (PyObject *)&PyBufferedRandom_Type; + else if (writing || appending) + Buffered_class = (PyObject *)&PyBufferedWriter_Type; + else if (reading) + Buffered_class = (PyObject *)&PyBufferedReader_Type; + else { + PyErr_Format(PyExc_ValueError, + "unknown mode: '%s'", mode); + goto error; + } + + buffer = PyObject_CallFunction(Buffered_class, "Oi", raw, buffering); + } + Py_CLEAR(raw); + if (buffer == NULL) + goto error; + + + /* if binary, returns the buffered file */ + if (binary) { + Py_DECREF(modeobj); + return buffer; + } + + /* wraps into a TextIOWrapper */ + wrapper = PyObject_CallFunction((PyObject *)&PyTextIOWrapper_Type, + "Osssi", + buffer, + encoding, errors, newline, + line_buffering); + Py_CLEAR(buffer); + if (wrapper == NULL) + goto error; + + if (PyObject_SetAttrString(wrapper, "mode", modeobj) < 0) + goto error; + Py_DECREF(modeobj); + return wrapper; + + error: + Py_XDECREF(raw); + Py_XDECREF(modeobj); + Py_XDECREF(buffer); + Py_XDECREF(wrapper); + return NULL; +} + +/* + * Private helpers for the io module. + */ + +Py_off_t +PyNumber_AsOff_t(PyObject *item, PyObject *err) +{ + Py_off_t result; + PyObject *runerr; + PyObject *value = PyNumber_Index(item); + if (value == NULL) + return -1; + + if (PyInt_Check(value)) { + /* We assume a long always fits in a Py_off_t... */ + result = (Py_off_t) PyInt_AS_LONG(value); + goto finish; + } + + /* We're done if PyLong_AsSsize_t() returns without error. */ + result = PyLong_AsOff_t(value); + if (result != -1 || !(runerr = PyErr_Occurred())) + goto finish; + + /* Error handling code -- only manage OverflowError differently */ + if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) + goto finish; + + PyErr_Clear(); + /* If no error-handling desired then the default clipping + is sufficient. + */ + if (!err) { + assert(PyLong_Check(value)); + /* Whether or not it is less than or equal to + zero is determined by the sign of ob_size + */ + if (_PyLong_Sign(value) < 0) + result = PY_OFF_T_MIN; + else + result = PY_OFF_T_MAX; + } + else { + /* Otherwise replace the error with caller's error object. */ + PyErr_Format(err, + "cannot fit '%.200s' into an offset-sized integer", + item->ob_type->tp_name); + } + + finish: + Py_DECREF(value); + return result; +} + + +/* + * Module definition + */ + +PyObject *_PyIO_os_module = NULL; +PyObject *_PyIO_locale_module = NULL; +PyObject *_PyIO_unsupported_operation = NULL; + +static PyMethodDef module_methods[] = { + {"open", (PyCFunction)io_open, METH_VARARGS|METH_KEYWORDS, open_doc}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +init_io(void) +{ + PyObject *m = Py_InitModule4("_io", module_methods, + module_doc, NULL, PYTHON_API_VERSION); + if (m == NULL) + return; + + /* put os in the module state */ + _PyIO_os_module = PyImport_ImportModule("os"); + if (_PyIO_os_module == NULL) + goto fail; + +#define ADD_TYPE(type, name) \ + if (PyType_Ready(type) < 0) \ + goto fail; \ + Py_INCREF(type); \ + if (PyModule_AddObject(m, name, (PyObject *)type) < 0) { \ + Py_DECREF(type); \ + goto fail; \ + } + + /* DEFAULT_BUFFER_SIZE */ + if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0) + goto fail; + + /* UnsupportedOperation inherits from ValueError and IOError */ + _PyIO_unsupported_operation = PyObject_CallFunction( + (PyObject *)&PyType_Type, "s(OO){}", + "UnsupportedOperation", PyExc_ValueError, PyExc_IOError); + if (_PyIO_unsupported_operation == NULL) + goto fail; + Py_INCREF(_PyIO_unsupported_operation); + if (PyModule_AddObject(m, "UnsupportedOperation", + _PyIO_unsupported_operation) < 0) + goto fail; + + /* BlockingIOError */ + _PyExc_BlockingIOError.tp_base = (PyTypeObject *) PyExc_IOError; + ADD_TYPE(&_PyExc_BlockingIOError, "BlockingIOError"); + + /* Concrete base types of the IO ABCs. + (the ABCs themselves are declared through inheritance in io.py) + */ + ADD_TYPE(&PyIOBase_Type, "_IOBase"); + ADD_TYPE(&PyRawIOBase_Type, "_RawIOBase"); + ADD_TYPE(&PyBufferedIOBase_Type, "_BufferedIOBase"); + ADD_TYPE(&PyTextIOBase_Type, "_TextIOBase"); + + /* Implementation of concrete IO objects. */ + /* FileIO */ + PyFileIO_Type.tp_base = &PyRawIOBase_Type; + ADD_TYPE(&PyFileIO_Type, "FileIO"); + + /* BytesIO */ + PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBytesIO_Type, "BytesIO"); + + /* StringIO */ + PyStringIO_Type.tp_base = &PyTextIOBase_Type; + ADD_TYPE(&PyStringIO_Type, "StringIO"); + + /* BufferedReader */ + PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedReader_Type, "BufferedReader"); + + /* BufferedWriter */ + PyBufferedWriter_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedWriter_Type, "BufferedWriter"); + + /* BufferedRWPair */ + PyBufferedRWPair_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedRWPair_Type, "BufferedRWPair"); + + /* BufferedRandom */ + PyBufferedRandom_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedRandom_Type, "BufferedRandom"); + + /* TextIOWrapper */ + PyTextIOWrapper_Type.tp_base = &PyTextIOBase_Type; + ADD_TYPE(&PyTextIOWrapper_Type, "TextIOWrapper"); + + /* IncrementalNewlineDecoder */ + ADD_TYPE(&PyIncrementalNewlineDecoder_Type, "IncrementalNewlineDecoder"); + + /* Interned strings */ + if (!(_PyIO_str_close = PyString_InternFromString("close"))) + goto fail; + if (!(_PyIO_str_closed = PyString_InternFromString("closed"))) + goto fail; + if (!(_PyIO_str_decode = PyString_InternFromString("decode"))) + goto fail; + if (!(_PyIO_str_encode = PyString_InternFromString("encode"))) + goto fail; + if (!(_PyIO_str_fileno = PyString_InternFromString("fileno"))) + goto fail; + if (!(_PyIO_str_flush = PyString_InternFromString("flush"))) + goto fail; + if (!(_PyIO_str_getstate = PyString_InternFromString("getstate"))) + goto fail; + if (!(_PyIO_str_isatty = PyString_InternFromString("isatty"))) + goto fail; + if (!(_PyIO_str_newlines = PyString_InternFromString("newlines"))) + goto fail; + if (!(_PyIO_str_nl = PyString_InternFromString("\n"))) + goto fail; + if (!(_PyIO_str_read = PyString_InternFromString("read"))) + goto fail; + if (!(_PyIO_str_read1 = PyString_InternFromString("read1"))) + goto fail; + if (!(_PyIO_str_readable = PyString_InternFromString("readable"))) + goto fail; + if (!(_PyIO_str_readinto = PyString_InternFromString("readinto"))) + goto fail; + if (!(_PyIO_str_readline = PyString_InternFromString("readline"))) + goto fail; + if (!(_PyIO_str_reset = PyString_InternFromString("reset"))) + goto fail; + if (!(_PyIO_str_seek = PyString_InternFromString("seek"))) + goto fail; + if (!(_PyIO_str_seekable = PyString_InternFromString("seekable"))) + goto fail; + if (!(_PyIO_str_setstate = PyString_InternFromString("setstate"))) + goto fail; + if (!(_PyIO_str_tell = PyString_InternFromString("tell"))) + goto fail; + if (!(_PyIO_str_truncate = PyString_InternFromString("truncate"))) + goto fail; + if (!(_PyIO_str_write = PyString_InternFromString("write"))) + goto fail; + if (!(_PyIO_str_writable = PyString_InternFromString("writable"))) + goto fail; + + if (!(_PyIO_empty_str = PyUnicode_FromStringAndSize(NULL, 0))) + goto fail; + if (!(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0))) + goto fail; + if (!(_PyIO_zero = PyLong_FromLong(0L))) + goto fail; + + return; + + fail: + Py_CLEAR(_PyIO_os_module); + Py_CLEAR(_PyIO_unsupported_operation); + Py_DECREF(m); +} diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/_iomodule.h --- /dev/null +++ b/Modules/_io/_iomodule.h @@ -0,0 +1,146 @@ +/* + * Declarations shared between the different parts of the io module + */ + +/* ABCs */ +extern PyTypeObject PyIOBase_Type; +extern PyTypeObject PyRawIOBase_Type; +extern PyTypeObject PyBufferedIOBase_Type; +extern PyTypeObject PyTextIOBase_Type; + +/* Concrete classes */ +extern PyTypeObject PyFileIO_Type; +extern PyTypeObject PyBytesIO_Type; +extern PyTypeObject PyStringIO_Type; +extern PyTypeObject PyBufferedReader_Type; +extern PyTypeObject PyBufferedWriter_Type; +extern PyTypeObject PyBufferedRWPair_Type; +extern PyTypeObject PyBufferedRandom_Type; +extern PyTypeObject PyTextIOWrapper_Type; +extern PyTypeObject PyIncrementalNewlineDecoder_Type; + +/* These functions are used as METH_NOARGS methods, are normally called + * with args=NULL, and return a new reference. + * BUT when args=Py_True is passed, they return a borrowed reference. + */ +extern PyObject* _PyIOBase_checkReadable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_checkWritable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_checkSeekable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_checkClosed(PyObject *self, PyObject *args); + +/* Helper for finalization. + This function will revive an object ready to be deallocated and try to + close() it. It returns 0 if the object can be destroyed, or -1 if it + is alive again. */ +extern int _PyIOBase_finalize(PyObject *self); + +/* Returns true if the given FileIO object is closed. + Doesn't check the argument type, so be careful! */ +extern int _PyFileIO_closed(PyObject *self); + +/* Shortcut to the core of the IncrementalNewlineDecoder.decode method */ +extern PyObject *_PyIncrementalNewlineDecoder_decode( + PyObject *self, PyObject *input, int final); + +/* Finds the first line ending between `start` and `end`. + If found, returns the index after the line ending and doesn't touch + `*consumed`. + If not found, returns -1 and sets `*consumed` to the number of characters + which can be safely put aside until another search. + + NOTE: for performance reasons, `end` must point to a NUL character ('\0'). + Otherwise, the function will scan further and return garbage. */ +extern Py_ssize_t _PyIO_find_line_ending( + int translated, int universal, PyObject *readnl, + Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed); + + +#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ + +typedef struct { + /* This is the equivalent of PyException_HEAD in 3.x */ + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + + PyObject *myerrno; + PyObject *strerror; + PyObject *filename; /* Not used, but part of the IOError object */ + Py_ssize_t written; +} PyBlockingIOErrorObject; +PyAPI_DATA(PyObject *) PyExc_BlockingIOError; + +/* + * Offset type for positioning. + */ + +#if defined(MS_WIN64) || defined(MS_WINDOWS) + +/* Windows uses long long for offsets */ +typedef PY_LONG_LONG Py_off_t; +# define PyLong_AsOff_t PyLong_AsLongLong +# define PyLong_FromOff_t PyLong_FromLongLong +# define PY_OFF_T_MAX PY_LLONG_MAX +# define PY_OFF_T_MIN PY_LLONG_MIN + +#else + +/* Other platforms use off_t */ +typedef off_t Py_off_t; +#if (SIZEOF_OFF_T == SIZEOF_SIZE_T) +# define PyLong_AsOff_t PyLong_AsSsize_t +# define PyLong_FromOff_t PyLong_FromSsize_t +# define PY_OFF_T_MAX PY_SSIZE_T_MAX +# define PY_OFF_T_MIN PY_SSIZE_T_MIN +#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG) +# define PyLong_AsOff_t PyLong_AsLongLong +# define PyLong_FromOff_t PyLong_FromLongLong +# define PY_OFF_T_MAX PY_LLONG_MAX +# define PY_OFF_T_MIN PY_LLONG_MIN +#elif (SIZEOF_OFF_T == SIZEOF_LONG) +# define PyLong_AsOff_t PyLong_AsLong +# define PyLong_FromOff_t PyLong_FromLong +# define PY_OFF_T_MAX LONG_MAX +# define PY_OFF_T_MIN LONG_MIN +#else +# error off_t does not match either size_t, long, or long long! +#endif + +#endif + +extern Py_off_t PyNumber_AsOff_t(PyObject *item, PyObject *err); + +/* Implementation details */ + +extern PyObject *_PyIO_os_module; +extern PyObject *_PyIO_locale_module; +extern PyObject *_PyIO_unsupported_operation; + +extern PyObject *_PyIO_str_close; +extern PyObject *_PyIO_str_closed; +extern PyObject *_PyIO_str_decode; +extern PyObject *_PyIO_str_encode; +extern PyObject *_PyIO_str_fileno; +extern PyObject *_PyIO_str_flush; +extern PyObject *_PyIO_str_getstate; +extern PyObject *_PyIO_str_isatty; +extern PyObject *_PyIO_str_newlines; +extern PyObject *_PyIO_str_nl; +extern PyObject *_PyIO_str_read; +extern PyObject *_PyIO_str_read1; +extern PyObject *_PyIO_str_readable; +extern PyObject *_PyIO_str_readinto; +extern PyObject *_PyIO_str_readline; +extern PyObject *_PyIO_str_reset; +extern PyObject *_PyIO_str_seek; +extern PyObject *_PyIO_str_seekable; +extern PyObject *_PyIO_str_setstate; +extern PyObject *_PyIO_str_tell; +extern PyObject *_PyIO_str_truncate; +extern PyObject *_PyIO_str_writable; +extern PyObject *_PyIO_str_write; + +extern PyObject *_PyIO_empty_str; +extern PyObject *_PyIO_empty_bytes; +extern PyObject *_PyIO_zero; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/bufferedio.c --- /dev/null +++ b/Modules/_io/bufferedio.c @@ -0,0 +1,2291 @@ +/* + An implementation of Buffered I/O as defined by PEP 3116 - "New I/O" + + Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter, + BufferedRandom. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "pythread.h" +#include "_iomodule.h" + +/* + * BufferedIOBase class, inherits from IOBase. + */ +PyDoc_STRVAR(BufferedIOBase_doc, + "Base class for buffered IO objects.\n" + "\n" + "The main difference with RawIOBase is that the read() method\n" + "supports omitting the size argument, and does not have a default\n" + "implementation that defers to readinto().\n" + "\n" + "In addition, read(), readinto() and write() may raise\n" + "BlockingIOError if the underlying raw stream is in non-blocking\n" + "mode and not ready; unlike their raw counterparts, they will never\n" + "return None.\n" + "\n" + "A typical implementation should not inherit from a RawIOBase\n" + "implementation, but wrap one.\n" + ); + +static PyObject * +BufferedIOBase_readinto(PyObject *self, PyObject *args) +{ + Py_buffer buf; + Py_ssize_t len; + PyObject *data; + + if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + return NULL; + } + + data = PyObject_CallMethod(self, "read", "n", buf.len); + if (data == NULL) + goto error; + + if (!PyBytes_Check(data)) { + Py_DECREF(data); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + goto error; + } + + len = Py_SIZE(data); + memcpy(buf.buf, PyBytes_AS_STRING(data), len); + + PyBuffer_Release(&buf); + Py_DECREF(data); + + return PyLong_FromSsize_t(len); + + error: + PyBuffer_Release(&buf); + return NULL; +} + +static PyObject * +BufferedIOBase_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +PyDoc_STRVAR(BufferedIOBase_detach_doc, + "Disconnect this buffer from its underlying raw stream and return it.\n" + "\n" + "After the raw stream has been detached, the buffer is in an unusable\n" + "state.\n"); + +static PyObject * +BufferedIOBase_detach(PyObject *self) +{ + return BufferedIOBase_unsupported("detach"); +} + +PyDoc_STRVAR(BufferedIOBase_read_doc, + "Read and return up to n bytes.\n" + "\n" + "If the argument is omitted, None, or negative, reads and\n" + "returns all data until EOF.\n" + "\n" + "If the argument is positive, and the underlying raw stream is\n" + "not 'interactive', multiple raw reads may be issued to satisfy\n" + "the byte count (unless EOF is reached first). But for\n" + "interactive raw streams (as well as sockets and pipes), at most\n" + "one raw read will be issued, and a short result does not imply\n" + "that EOF is imminent.\n" + "\n" + "Returns an empty bytes object on EOF.\n" + "\n" + "Returns None if the underlying raw stream was open in non-blocking\n" + "mode and no data is available at the moment.\n"); + +static PyObject * +BufferedIOBase_read(PyObject *self, PyObject *args) +{ + return BufferedIOBase_unsupported("read"); +} + +PyDoc_STRVAR(BufferedIOBase_read1_doc, + "Read and return up to n bytes, with at most one read() call\n" + "to the underlying raw stream. A short result does not imply\n" + "that EOF is imminent.\n" + "\n" + "Returns an empty bytes object on EOF.\n"); + +static PyObject * +BufferedIOBase_read1(PyObject *self, PyObject *args) +{ + return BufferedIOBase_unsupported("read1"); +} + +PyDoc_STRVAR(BufferedIOBase_write_doc, + "Write the given buffer to the IO stream.\n" + "\n" + "Returns the number of bytes written, which is never less than\n" + "len(b).\n" + "\n" + "Raises BlockingIOError if the buffer is full and the\n" + "underlying raw stream cannot accept more data at the moment.\n"); + +static PyObject * +BufferedIOBase_write(PyObject *self, PyObject *args) +{ + return BufferedIOBase_unsupported("write"); +} + + +static PyMethodDef BufferedIOBase_methods[] = { + {"detach", (PyCFunction)BufferedIOBase_detach, METH_NOARGS, BufferedIOBase_detach_doc}, + {"read", BufferedIOBase_read, METH_VARARGS, BufferedIOBase_read_doc}, + {"read1", BufferedIOBase_read1, METH_VARARGS, BufferedIOBase_read1_doc}, + {"readinto", BufferedIOBase_readinto, METH_VARARGS, NULL}, + {"write", BufferedIOBase_write, METH_VARARGS, BufferedIOBase_write_doc}, + {NULL, NULL} +}; + +PyTypeObject PyBufferedIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._BufferedIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + BufferedIOBase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BufferedIOBase_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +typedef struct { + PyObject_HEAD + + PyObject *raw; + int ok; /* Initialized? */ + int detached; + int readable; + int writable; + + /* True if this is a vanilla Buffered object (rather than a user derived + class) *and* the raw stream is a vanilla FileIO object. */ + int fast_closed_checks; + + /* Absolute position inside the raw stream (-1 if unknown). */ + Py_off_t abs_pos; + + /* A static buffer of size `buffer_size` */ + char *buffer; + /* Current logical position in the buffer. */ + Py_off_t pos; + /* Position of the raw stream in the buffer. */ + Py_off_t raw_pos; + + /* Just after the last buffered byte in the buffer, or -1 if the buffer + isn't ready for reading. */ + Py_off_t read_end; + + /* Just after the last byte actually written */ + Py_off_t write_pos; + /* Just after the last byte waiting to be written, or -1 if the buffer + isn't ready for writing. */ + Py_off_t write_end; + +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif + + Py_ssize_t buffer_size; + Py_ssize_t buffer_mask; + + PyObject *dict; + PyObject *weakreflist; +} BufferedObject; + +/* + Implementation notes: + + * BufferedReader, BufferedWriter and BufferedRandom try to share most + methods (this is helped by the members `readable` and `writable`, which + are initialized in the respective constructors) + * They also share a single buffer for reading and writing. This enables + interleaved reads and writes without flushing. It also makes the logic + a bit trickier to get right. + * The absolute position of the raw stream is cached, if possible, in the + `abs_pos` member. It must be updated every time an operation is done + on the raw stream. If not sure, it can be reinitialized by calling + _Buffered_raw_tell(), which queries the raw stream (_Buffered_raw_seek() + also does it). To read it, use RAW_TELL(). + * Three helpers, _BufferedReader_raw_read, _BufferedWriter_raw_write and + _BufferedWriter_flush_unlocked do a lot of useful housekeeping. + + NOTE: we should try to maintain block alignment of reads and writes to the + raw stream (according to the buffer size), but for now it is only done + in read() and friends. + + XXX: method naming is a bit messy. +*/ + +/* These macros protect the BufferedObject against concurrent operations. */ + +#ifdef WITH_THREAD +#define ENTER_BUFFERED(self) \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock(self->lock, 1); \ + Py_END_ALLOW_THREADS + +#define LEAVE_BUFFERED(self) \ + PyThread_release_lock(self->lock); +#else +#define ENTER_BUFFERED(self) +#define LEAVE_BUFFERED(self) +#endif + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "raw stream has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return NULL; \ + } + +#define CHECK_INITIALIZED_INT(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "raw stream has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return -1; \ + } + +#define IS_CLOSED(self) \ + (self->fast_closed_checks \ + ? _PyFileIO_closed(self->raw) \ + : BufferedIOMixin_closed(self)) + +#define CHECK_CLOSED(self, error_msg) \ + if (IS_CLOSED(self)) { \ + PyErr_SetString(PyExc_ValueError, error_msg); \ + return NULL; \ + } + + +#define VALID_READ_BUFFER(self) \ + (self->readable && self->read_end != -1) + +#define VALID_WRITE_BUFFER(self) \ + (self->writable && self->write_end != -1) + +#define ADJUST_POSITION(self, _new_pos) \ + do { \ + self->pos = _new_pos; \ + if (VALID_READ_BUFFER(self) && self->read_end < self->pos) \ + self->read_end = self->pos; \ + } while(0) + +#define READAHEAD(self) \ + ((self->readable && VALID_READ_BUFFER(self)) \ + ? (self->read_end - self->pos) : 0) + +#define RAW_OFFSET(self) \ + (((VALID_READ_BUFFER(self) || VALID_WRITE_BUFFER(self)) \ + && self->raw_pos >= 0) ? self->raw_pos - self->pos : 0) + +#define RAW_TELL(self) \ + (self->abs_pos != -1 ? self->abs_pos : _Buffered_raw_tell(self)) + +#define MINUS_LAST_BLOCK(self, size) \ + (self->buffer_mask ? \ + (size & ~self->buffer_mask) : \ + (self->buffer_size * (size / self->buffer_size))) + + +static void +BufferedObject_dealloc(BufferedObject *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return; + _PyObject_GC_UNTRACK(self); + self->ok = 0; + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + Py_CLEAR(self->raw); + if (self->buffer) { + PyMem_Free(self->buffer); + self->buffer = NULL; + } +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + self->lock = NULL; + } +#endif + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +Buffered_traverse(BufferedObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->raw); + Py_VISIT(self->dict); + return 0; +} + +static int +Buffered_clear(BufferedObject *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return -1; + self->ok = 0; + Py_CLEAR(self->raw); + Py_CLEAR(self->dict); + return 0; +} + +/* + * _BufferedIOMixin methods + * This is not a class, just a collection of methods that will be reused + * by BufferedReader and BufferedWriter + */ + +/* Flush and close */ + +static PyObject * +BufferedIOMixin_flush(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_flush, NULL); +} + +static int +BufferedIOMixin_closed(BufferedObject *self) +{ + int closed; + PyObject *res; + CHECK_INITIALIZED_INT(self) + res = PyObject_GetAttr(self->raw, _PyIO_str_closed); + if (res == NULL) + return -1; + closed = PyObject_IsTrue(res); + Py_DECREF(res); + return closed; +} + +static PyObject * +BufferedIOMixin_closed_get(BufferedObject *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttr(self->raw, _PyIO_str_closed); +} + +static PyObject * +BufferedIOMixin_close(BufferedObject *self, PyObject *args) +{ + PyObject *res = NULL; + int r; + + CHECK_INITIALIZED(self) + ENTER_BUFFERED(self) + + r = BufferedIOMixin_closed(self); + if (r < 0) + goto end; + if (r > 0) { + res = Py_None; + Py_INCREF(res); + goto end; + } + /* flush() will most probably re-take the lock, so drop it first */ + LEAVE_BUFFERED(self) + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + ENTER_BUFFERED(self) + if (res == NULL) { + /* If flush() fails, just give up */ + if (PyErr_ExceptionMatches(PyExc_IOError)) + PyErr_Clear(); + else + goto end; + } + Py_XDECREF(res); + + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_close, NULL); + +end: + LEAVE_BUFFERED(self) + return res; +} + +/* detach */ + +static PyObject * +BufferedIOMixin_detach(BufferedObject *self, PyObject *args) +{ + PyObject *raw, *res; + CHECK_INITIALIZED(self) + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + raw = self->raw; + self->raw = NULL; + self->detached = 1; + self->ok = 0; + return raw; +} + +/* Inquiries */ + +static PyObject * +BufferedIOMixin_seekable(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_seekable, NULL); +} + +static PyObject * +BufferedIOMixin_readable(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readable, NULL); +} + +static PyObject * +BufferedIOMixin_writable(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_writable, NULL); +} + +static PyObject * +BufferedIOMixin_name_get(BufferedObject *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttrString(self->raw, "name"); +} + +static PyObject * +BufferedIOMixin_mode_get(BufferedObject *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttrString(self->raw, "mode"); +} + +/* Lower-level APIs */ + +static PyObject * +BufferedIOMixin_fileno(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_fileno, NULL); +} + +static PyObject * +BufferedIOMixin_isatty(BufferedObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL); +} + + +/* Forward decls */ +static PyObject * +_BufferedWriter_flush_unlocked(BufferedObject *, int); +static Py_ssize_t +_BufferedReader_fill_buffer(BufferedObject *self); +static void +_BufferedReader_reset_buf(BufferedObject *self); +static void +_BufferedWriter_reset_buf(BufferedObject *self); +static PyObject * +_BufferedReader_peek_unlocked(BufferedObject *self, Py_ssize_t); +static PyObject * +_BufferedReader_read_all(BufferedObject *self); +static PyObject * +_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t); +static PyObject * +_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t); + + +/* + * Helpers + */ + +/* Returns the address of the `written` member if a BlockingIOError was + raised, NULL otherwise. The error is always re-raised. */ +static Py_ssize_t * +_Buffered_check_blocking_error(void) +{ + PyObject *t, *v, *tb; + PyBlockingIOErrorObject *err; + + PyErr_Fetch(&t, &v, &tb); + if (v == NULL || !PyErr_GivenExceptionMatches(v, PyExc_BlockingIOError)) { + PyErr_Restore(t, v, tb); + return NULL; + } + err = (PyBlockingIOErrorObject *) v; + /* TODO: sanity check (err->written >= 0) */ + PyErr_Restore(t, v, tb); + return &err->written; +} + +static Py_off_t +_Buffered_raw_tell(BufferedObject *self) +{ + Py_off_t n; + PyObject *res; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_tell, NULL); + if (res == NULL) + return -1; + n = PyNumber_AsOff_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0) { + if (!PyErr_Occurred()) + PyErr_Format(PyExc_IOError, + "Raw stream returned invalid position %zd", n); + return -1; + } + self->abs_pos = n; + return n; +} + +static Py_off_t +_Buffered_raw_seek(BufferedObject *self, Py_off_t target, int whence) +{ + PyObject *res, *posobj, *whenceobj; + Py_off_t n; + + posobj = PyLong_FromOff_t(target); + if (posobj == NULL) + return -1; + whenceobj = PyLong_FromLong(whence); + if (whenceobj == NULL) { + Py_DECREF(posobj); + return -1; + } + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_seek, + posobj, whenceobj, NULL); + Py_DECREF(posobj); + Py_DECREF(whenceobj); + if (res == NULL) + return -1; + n = PyNumber_AsOff_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0) { + if (!PyErr_Occurred()) + PyErr_Format(PyExc_IOError, + "Raw stream returned invalid position %zd", n); + return -1; + } + self->abs_pos = n; + return n; +} + +static int +_Buffered_init(BufferedObject *self) +{ + Py_ssize_t n; + if (self->buffer_size <= 0) { + PyErr_SetString(PyExc_ValueError, + "buffer size must be strictly positive"); + return -1; + } + if (self->buffer) + PyMem_Free(self->buffer); + self->buffer = PyMem_Malloc(self->buffer_size); + if (self->buffer == NULL) { + PyErr_NoMemory(); + return -1; + } +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock"); + return -1; + } +#endif + /* Find out whether buffer_size is a power of 2 */ + /* XXX is this optimization useful? */ + for (n = self->buffer_size - 1; n & 1; n >>= 1) + ; + if (n == 0) + self->buffer_mask = self->buffer_size - 1; + else + self->buffer_mask = 0; + if (_Buffered_raw_tell(self) == -1) + PyErr_Clear(); + return 0; +} + +/* + * Shared methods and wrappers + */ + +static PyObject * +Buffered_flush(BufferedObject *self, PyObject *args) +{ + PyObject *res; + + CHECK_INITIALIZED(self) + CHECK_CLOSED(self, "flush of closed file") + + ENTER_BUFFERED(self) + res = _BufferedWriter_flush_unlocked(self, 0); + if (res != NULL && self->readable) { + /* Rewind the raw stream so that its position corresponds to + the current logical position. */ + Py_off_t n; + n = _Buffered_raw_seek(self, -RAW_OFFSET(self), 1); + if (n == -1) + Py_CLEAR(res); + _BufferedReader_reset_buf(self); + } + LEAVE_BUFFERED(self) + + return res; +} + +static PyObject * +Buffered_peek(BufferedObject *self, PyObject *args) +{ + Py_ssize_t n = 0; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|n:peek", &n)) { + return NULL; + } + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + res = _BufferedReader_peek_unlocked(self, n); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +Buffered_read(BufferedObject *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *res; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|n:read", &n)) { + return NULL; + } + if (n < -1) { + PyErr_SetString(PyExc_ValueError, + "read length must be positive or -1"); + return NULL; + } + + CHECK_CLOSED(self, "read of closed file") + + if (n == -1) { + /* The number of bytes is unspecified, read until the end of stream */ + ENTER_BUFFERED(self) + res = _BufferedReader_read_all(self); + LEAVE_BUFFERED(self) + } + else { + res = _BufferedReader_read_fast(self, n); + if (res == Py_None) { + Py_DECREF(res); + ENTER_BUFFERED(self) + res = _BufferedReader_read_generic(self, n); + LEAVE_BUFFERED(self) + } + } + + return res; +} + +static PyObject * +Buffered_read1(BufferedObject *self, PyObject *args) +{ + Py_ssize_t n, have, r; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "n:read1", &n)) { + return NULL; + } + + if (n < 0) { + PyErr_SetString(PyExc_ValueError, + "read length must be positive"); + return NULL; + } + if (n == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + + /* Return up to n bytes. If at least one byte is buffered, we + only return buffered bytes. Otherwise, we do one raw read. */ + + /* XXX: this mimicks the io.py implementation but is probably wrong. + If we need to read from the raw stream, then we could actually read + all `n` bytes asked by the caller (and possibly more, so as to fill + our buffer for the next reads). */ + + have = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (have > 0) { + if (n > have) + n = have; + res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); + if (res == NULL) + goto end; + self->pos += n; + goto end; + } + + /* Fill the buffer from the raw stream, and copy it to the result. */ + _BufferedReader_reset_buf(self); + r = _BufferedReader_fill_buffer(self); + if (r == -1) + goto end; + if (r == -2) + r = 0; + if (n > r) + n = r; + res = PyBytes_FromStringAndSize(self->buffer, n); + if (res == NULL) + goto end; + self->pos = n; + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +Buffered_readinto(BufferedObject *self, PyObject *args) +{ + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + + /* TODO: use raw.readinto() instead! */ + if (self->writable) { + ENTER_BUFFERED(self) + res = _BufferedWriter_flush_unlocked(self, 0); + LEAVE_BUFFERED(self) + if (res == NULL) + goto end; + Py_DECREF(res); + } + res = BufferedIOBase_readinto((PyObject *)self, args); + +end: + return res; +} + +static PyObject * +_Buffered_readline(BufferedObject *self, Py_ssize_t limit) +{ + PyObject *res = NULL; + PyObject *chunks = NULL; + Py_ssize_t n, written = 0; + const char *start, *s, *end; + + CHECK_CLOSED(self, "readline of closed file") + + /* First, try to find a line in the buffer. This can run unlocked because + the calls to the C API are simple enough that they can't trigger + any thread switch. */ + n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (limit >= 0 && n > limit) + n = limit; + start = self->buffer + self->pos; + s = memchr(start, '\n', n); + if (s != NULL) { + res = PyBytes_FromStringAndSize(start, s - start + 1); + if (res != NULL) + self->pos += s - start + 1; + goto end_unlocked; + } + if (n == limit) { + res = PyBytes_FromStringAndSize(start, n); + if (res != NULL) + self->pos += n; + goto end_unlocked; + } + + ENTER_BUFFERED(self) + + /* Now we try to get some more from the raw stream */ + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + chunks = PyList_New(0); + if (chunks == NULL) + goto end; + if (n > 0) { + res = PyBytes_FromStringAndSize(start, n); + if (res == NULL) + goto end; + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + written += n; + if (limit >= 0) + limit -= n; + } + + for (;;) { + _BufferedReader_reset_buf(self); + n = _BufferedReader_fill_buffer(self); + if (n == -1) + goto end; + if (n <= 0) + break; + if (limit >= 0 && n > limit) + n = limit; + start = self->buffer; + end = start + n; + s = start; + while (s < end) { + if (*s++ == '\n') { + res = PyBytes_FromStringAndSize(start, s - start); + if (res == NULL) + goto end; + self->pos = s - start; + goto found; + } + } + res = PyBytes_FromStringAndSize(start, n); + if (res == NULL) + goto end; + if (n == limit) { + self->pos = n; + break; + } + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + written += n; + if (limit >= 0) + limit -= n; + } +found: + if (res != NULL && PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + res = _PyBytes_Join(_PyIO_empty_bytes, chunks); + +end: + LEAVE_BUFFERED(self) +end_unlocked: + Py_XDECREF(chunks); + return res; +} + +static PyObject * +Buffered_readline(BufferedObject *self, PyObject *args) +{ + PyObject *limitobj = NULL; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self) + + if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) { + return NULL; + } + if (limitobj) { + if (!PyNumber_Check(limitobj)) { + PyErr_Format(PyExc_TypeError, + "integer argument expected, got '%.200s'", + Py_TYPE(limitobj)->tp_name); + return NULL; + } + limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + return _Buffered_readline(self, limit); +} + + +static PyObject * +Buffered_tell(BufferedObject *self, PyObject *args) +{ + Py_off_t pos; + + CHECK_INITIALIZED(self) + pos = _Buffered_raw_tell(self); + if (pos == -1) + return NULL; + pos -= RAW_OFFSET(self); + /* TODO: sanity check (pos >= 0) */ + return PyLong_FromOff_t(pos); +} + +static PyObject * +Buffered_seek(BufferedObject *self, PyObject *args) +{ + Py_off_t target, n; + int whence = 0; + PyObject *targetobj, *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "O|i:seek", &targetobj, &whence)) { + return NULL; + } + if (whence < 0 || whence > 2) { + PyErr_Format(PyExc_ValueError, + "whence must be between 0 and 2, not %d", whence); + return NULL; + } + + CHECK_CLOSED(self, "seek of closed file") + + target = PyNumber_AsOff_t(targetobj, PyExc_ValueError); + if (target == -1 && PyErr_Occurred()) + return NULL; + + if (whence != 2 && self->readable) { + Py_off_t current, avail; + /* Check if seeking leaves us inside the current buffer, + so as to return quickly if possible. Also, we needn't take the + lock in this fast path. + Don't know how to do that when whence == 2, though. */ + /* NOTE: RAW_TELL() can release the GIL but the object is in a stable + state at this point. */ + current = RAW_TELL(self); + avail = READAHEAD(self); + if (avail > 0) { + Py_off_t offset; + if (whence == 0) + offset = target - (current - RAW_OFFSET(self)); + else + offset = target; + if (offset >= -self->pos && offset <= avail) { + self->pos += offset; + return PyLong_FromOff_t(current - avail + offset); + } + } + } + + ENTER_BUFFERED(self) + + /* Fallback: invoke raw seek() method and clear buffer */ + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 0); + if (res == NULL) + goto end; + Py_CLEAR(res); + _BufferedWriter_reset_buf(self); + } + + /* TODO: align on block boundary and read buffer if needed? */ + if (whence == 1) + target -= RAW_OFFSET(self); + n = _Buffered_raw_seek(self, target, whence); + if (n == -1) + goto end; + self->raw_pos = -1; + res = PyLong_FromOff_t(n); + if (res != NULL && self->readable) + _BufferedReader_reset_buf(self); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +Buffered_truncate(BufferedObject *self, PyObject *args) +{ + PyObject *pos = Py_None; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) { + return NULL; + } + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 0); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + if (self->readable) { + if (pos == Py_None) { + /* Rewind the raw stream so that its position corresponds to + the current logical position. */ + if (_Buffered_raw_seek(self, -RAW_OFFSET(self), 1) == -1) + goto end; + } + _BufferedReader_reset_buf(self); + } + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_truncate, pos, NULL); + if (res == NULL) + goto end; + /* Reset cached position */ + if (_Buffered_raw_tell(self) == -1) + PyErr_Clear(); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +Buffered_iternext(BufferedObject *self) +{ + PyObject *line; + PyTypeObject *tp; + + CHECK_INITIALIZED(self); + + tp = Py_TYPE(self); + if (tp == &PyBufferedReader_Type || + tp == &PyBufferedRandom_Type) { + /* Skip method call overhead for speed */ + line = _Buffered_readline(self, -1); + } + else { + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyBytes_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyBytes_GET_SIZE(line) == 0) { + /* Reached EOF or would have blocked */ + Py_DECREF(line); + return NULL; + } + + return line; +} + +static PyObject * +Buffered_repr(BufferedObject *self) +{ + PyObject *nameobj, *res; + + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return NULL; + res = PyString_FromFormat("<%s>", Py_TYPE(self)->tp_name); + } + else { + PyObject *repr = PyObject_Repr(nameobj); + Py_DECREF(nameobj); + if (repr == NULL) + return NULL; + res = PyString_FromFormat("<%s name=%s>", + Py_TYPE(self)->tp_name, + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + return res; +} + +/* + * class BufferedReader + */ + +PyDoc_STRVAR(BufferedReader_doc, + "Create a new buffered reader using the given readable raw IO object."); + +static void _BufferedReader_reset_buf(BufferedObject *self) +{ + self->read_end = -1; +} + +static int +BufferedReader_init(BufferedObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"raw", "buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|n:BufferedReader", kwlist, + &raw, &buffer_size)) { + return -1; + } + + if (_PyIOBase_checkReadable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->buffer_size = buffer_size; + self->readable = 1; + self->writable = 0; + + if (_Buffered_init(self) < 0) + return -1; + _BufferedReader_reset_buf(self); + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedReader_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static Py_ssize_t +_BufferedReader_raw_read(BufferedObject *self, char *start, Py_ssize_t len) +{ + Py_buffer buf; + PyObject *memobj, *res; + Py_ssize_t n; + /* NOTE: the buffer needn't be released as its object is NULL. */ + if (PyBuffer_FillInfo(&buf, NULL, start, len, 0, PyBUF_CONTIG) == -1) + return -1; + memobj = PyMemoryView_FromBuffer(&buf); + if (memobj == NULL) + return -1; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); + Py_DECREF(memobj); + if (res == NULL) + return -1; + if (res == Py_None) { + /* Non-blocking stream would have blocked. Special return code! */ + Py_DECREF(res); + return -2; + } + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0 || n > len) { + PyErr_Format(PyExc_IOError, + "raw readinto() returned invalid length %zd " + "(should have been between 0 and %zd)", n, len); + return -1; + } + if (n > 0 && self->abs_pos != -1) + self->abs_pos += n; + return n; +} + +static Py_ssize_t +_BufferedReader_fill_buffer(BufferedObject *self) +{ + Py_ssize_t start, len, n; + if (VALID_READ_BUFFER(self)) + start = Py_SAFE_DOWNCAST(self->read_end, Py_off_t, Py_ssize_t); + else + start = 0; + len = self->buffer_size - start; + n = _BufferedReader_raw_read(self, self->buffer + start, len); + if (n <= 0) + return n; + self->read_end = start + n; + self->raw_pos = start + n; + return n; +} + +static PyObject * +_BufferedReader_read_all(BufferedObject *self) +{ + Py_ssize_t current_size; + PyObject *res, *data = NULL; + PyObject *chunks = PyList_New(0); + + if (chunks == NULL) + return NULL; + + /* First copy what we have in the current buffer. */ + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (current_size) { + data = PyBytes_FromStringAndSize( + self->buffer + self->pos, current_size); + if (data == NULL) { + Py_DECREF(chunks); + return NULL; + } + } + _BufferedReader_reset_buf(self); + /* We're going past the buffer's bounds, flush it */ + if (self->writable) { + res = _BufferedWriter_flush_unlocked(self, 1); + if (res == NULL) { + Py_DECREF(chunks); + return NULL; + } + Py_CLEAR(res); + } + while (1) { + if (data) { + if (PyList_Append(chunks, data) < 0) { + Py_DECREF(data); + Py_DECREF(chunks); + return NULL; + } + Py_DECREF(data); + } + + /* Read until EOF or until read() would block. */ + data = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_read, NULL); + if (data == NULL) { + Py_DECREF(chunks); + return NULL; + } + if (data != Py_None && !PyBytes_Check(data)) { + Py_DECREF(data); + Py_DECREF(chunks); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + return NULL; + } + if (data == Py_None || PyBytes_GET_SIZE(data) == 0) { + if (current_size == 0) { + Py_DECREF(chunks); + return data; + } + else { + res = _PyBytes_Join(_PyIO_empty_bytes, chunks); + Py_DECREF(data); + Py_DECREF(chunks); + return res; + } + } + current_size += PyBytes_GET_SIZE(data); + if (self->abs_pos != -1) + self->abs_pos += PyBytes_GET_SIZE(data); + } +} + +/* Read n bytes from the buffer if it can, otherwise return None. + This function is simple enough that it can run unlocked. */ +static PyObject * +_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t n) +{ + Py_ssize_t current_size; + + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n <= current_size) { + /* Fast path: the data to read is fully buffered. */ + PyObject *res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); + if (res != NULL) + self->pos += n; + return res; + } + Py_RETURN_NONE; +} + +/* Generic read function: read from the stream until enough bytes are read, + * or until an EOF occurs or until read() would block. + */ +static PyObject * +_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t n) +{ + PyObject *res = NULL; + Py_ssize_t current_size, remaining, written; + char *out; + + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n <= current_size) + return _BufferedReader_read_fast(self, n); + + res = PyBytes_FromStringAndSize(NULL, n); + if (res == NULL) + goto error; + out = PyBytes_AS_STRING(res); + remaining = n; + written = 0; + if (current_size > 0) { + memcpy(out, self->buffer + self->pos, current_size); + remaining -= current_size; + written += current_size; + } + _BufferedReader_reset_buf(self); + while (remaining > 0) { + /* We want to read a whole block at the end into buffer. + If we had readv() we could do this in one pass. */ + Py_ssize_t r = MINUS_LAST_BLOCK(self, remaining); + if (r == 0) + break; + r = _BufferedReader_raw_read(self, out + written, r); + if (r == -1) + goto error; + if (r == 0 || r == -2) { + /* EOF occurred or read() would block. */ + if (r == 0 || written > 0) { + if (_PyBytes_Resize(&res, written)) + goto error; + return res; + } + Py_DECREF(res); + Py_INCREF(Py_None); + return Py_None; + } + remaining -= r; + written += r; + } + assert(remaining <= self->buffer_size); + self->pos = 0; + self->raw_pos = 0; + self->read_end = 0; + while (self->read_end < self->buffer_size) { + Py_ssize_t r = _BufferedReader_fill_buffer(self); + if (r == -1) + goto error; + if (r == 0 || r == -2) { + /* EOF occurred or read() would block. */ + if (r == 0 || written > 0) { + if (_PyBytes_Resize(&res, written)) + goto error; + return res; + } + Py_DECREF(res); + Py_INCREF(Py_None); + return Py_None; + } + if (remaining > r) { + memcpy(out + written, self->buffer + self->pos, r); + written += r; + self->pos += r; + remaining -= r; + } + else if (remaining > 0) { + memcpy(out + written, self->buffer + self->pos, remaining); + written += remaining; + self->pos += remaining; + remaining = 0; + } + if (remaining == 0) + break; + } + + return res; + +error: + Py_XDECREF(res); + return NULL; +} + +static PyObject * +_BufferedReader_peek_unlocked(BufferedObject *self, Py_ssize_t n) +{ + Py_ssize_t have, r; + + have = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + /* Constraints: + 1. we don't want to advance the file position. + 2. we don't want to lose block alignment, so we can't shift the buffer + to make some place. + Therefore, we either return `have` bytes (if > 0), or a full buffer. + */ + if (have > 0) { + return PyBytes_FromStringAndSize(self->buffer + self->pos, have); + } + + /* Fill the buffer from the raw stream, and copy it to the result. */ + _BufferedReader_reset_buf(self); + r = _BufferedReader_fill_buffer(self); + if (r == -1) + return NULL; + if (r == -2) + r = 0; + self->pos = 0; + return PyBytes_FromStringAndSize(self->buffer, r); +} + +static PyMethodDef BufferedReader_methods[] = { + /* BufferedIOMixin methods */ + {"detach", (PyCFunction)BufferedIOMixin_detach, METH_NOARGS}, + {"flush", (PyCFunction)BufferedIOMixin_flush, METH_NOARGS}, + {"close", (PyCFunction)BufferedIOMixin_close, METH_NOARGS}, + {"seekable", (PyCFunction)BufferedIOMixin_seekable, METH_NOARGS}, + {"readable", (PyCFunction)BufferedIOMixin_readable, METH_NOARGS}, + {"writable", (PyCFunction)BufferedIOMixin_writable, METH_NOARGS}, + {"fileno", (PyCFunction)BufferedIOMixin_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)BufferedIOMixin_isatty, METH_NOARGS}, + + {"read", (PyCFunction)Buffered_read, METH_VARARGS}, + {"peek", (PyCFunction)Buffered_peek, METH_VARARGS}, + {"read1", (PyCFunction)Buffered_read1, METH_VARARGS}, + {"readline", (PyCFunction)Buffered_readline, METH_VARARGS}, + {"seek", (PyCFunction)Buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)Buffered_tell, METH_NOARGS}, + {"truncate", (PyCFunction)Buffered_truncate, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef BufferedReader_members[] = { + {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0}, + {NULL} +}; + +static PyGetSetDef BufferedReader_getset[] = { + {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL}, + {"name", (getter)BufferedIOMixin_name_get, NULL, NULL}, + {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedReader_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedReader", /*tp_name*/ + sizeof(BufferedObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)BufferedObject_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)Buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + BufferedReader_doc, /* tp_doc */ + (traverseproc)Buffered_traverse, /* tp_traverse */ + (inquiry)Buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(BufferedObject, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)Buffered_iternext, /* tp_iternext */ + BufferedReader_methods, /* tp_methods */ + BufferedReader_members, /* tp_members */ + BufferedReader_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(BufferedObject, dict), /* tp_dictoffset */ + (initproc)BufferedReader_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +static int +complain_about_max_buffer_size(void) +{ + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "max_buffer_size is deprecated", 1) < 0) + return 0; + return 1; +} + +/* + * class BufferedWriter + */ +PyDoc_STRVAR(BufferedWriter_doc, + "A buffer for a writeable sequential RawIO object.\n" + "\n" + "The constructor creates a BufferedWriter for the given writeable raw\n" + "stream. If the buffer_size is not given, it defaults to\n" + "DEFAULT_BUFFER_SIZE. max_buffer_size isn't used anymore.\n" + ); + +static void +_BufferedWriter_reset_buf(BufferedObject *self) +{ + self->write_pos = 0; + self->write_end = -1; +} + +static int +BufferedWriter_init(BufferedObject *self, PyObject *args, PyObject *kwds) +{ + /* TODO: properly deprecate max_buffer_size */ + char *kwlist[] = {"raw", "buffer_size", "max_buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nn:BufferedReader", kwlist, + &raw, &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_checkWritable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->readable = 0; + self->writable = 1; + + self->buffer_size = buffer_size; + if (_Buffered_init(self) < 0) + return -1; + _BufferedWriter_reset_buf(self); + self->pos = 0; + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static Py_ssize_t +_BufferedWriter_raw_write(BufferedObject *self, char *start, Py_ssize_t len) +{ + Py_buffer buf; + PyObject *memobj, *res; + Py_ssize_t n; + /* NOTE: the buffer needn't be released as its object is NULL. */ + if (PyBuffer_FillInfo(&buf, NULL, start, len, 1, PyBUF_CONTIG_RO) == -1) + return -1; + memobj = PyMemoryView_FromBuffer(&buf); + if (memobj == NULL) + return -1; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); + Py_DECREF(memobj); + if (res == NULL) + return -1; + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0 || n > len) { + PyErr_Format(PyExc_IOError, + "raw write() returned invalid length %zd " + "(should have been between 0 and %zd)", n, len); + return -1; + } + if (n > 0 && self->abs_pos != -1) + self->abs_pos += n; + return n; +} + +/* `restore_pos` is 1 if we need to restore the raw stream position at + the end, 0 otherwise. */ +static PyObject * +_BufferedWriter_flush_unlocked(BufferedObject *self, int restore_pos) +{ + Py_ssize_t written = 0; + Py_off_t n, rewind; + + if (!VALID_WRITE_BUFFER(self) || self->write_pos == self->write_end) + goto end; + /* First, rewind */ + rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); + if (rewind != 0) { + n = _Buffered_raw_seek(self, -rewind, 1); + if (n < 0) { + goto error; + } + self->raw_pos -= rewind; + } + while (self->write_pos < self->write_end) { + n = _BufferedWriter_raw_write(self, + self->buffer + self->write_pos, + Py_SAFE_DOWNCAST(self->write_end - self->write_pos, + Py_off_t, Py_ssize_t)); + if (n == -1) { + Py_ssize_t *w = _Buffered_check_blocking_error(); + if (w == NULL) + goto error; + self->write_pos += *w; + self->raw_pos = self->write_pos; + written += *w; + *w = written; + /* Already re-raised */ + goto error; + } + self->write_pos += n; + self->raw_pos = self->write_pos; + written += Py_SAFE_DOWNCAST(n, Py_off_t, Py_ssize_t); + } + + if (restore_pos) { + Py_off_t forward = rewind - written; + if (forward != 0) { + n = _Buffered_raw_seek(self, forward, 1); + if (n < 0) { + goto error; + } + self->raw_pos += forward; + } + } + _BufferedWriter_reset_buf(self); + +end: + Py_RETURN_NONE; + +error: + return NULL; +} + +static PyObject * +BufferedWriter_write(BufferedObject *self, PyObject *args) +{ + PyObject *res = NULL; + Py_buffer buf; + Py_ssize_t written, avail, remaining, n; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "s*:write", &buf)) { + return NULL; + } + + if (IS_CLOSED(self)) { + PyErr_SetString(PyExc_ValueError, "write to closed file"); + PyBuffer_Release(&buf); + return NULL; + } + + ENTER_BUFFERED(self) + + /* Fast path: the data to write can be fully buffered. */ + if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) { + self->pos = 0; + self->raw_pos = 0; + } + avail = Py_SAFE_DOWNCAST(self->buffer_size - self->pos, Py_off_t, Py_ssize_t); + if (buf.len <= avail) { + memcpy(self->buffer + self->pos, buf.buf, buf.len); + if (!VALID_WRITE_BUFFER(self)) { + self->write_pos = self->pos; + } + ADJUST_POSITION(self, self->pos + buf.len); + if (self->pos > self->write_end) + self->write_end = self->pos; + written = buf.len; + goto end; + } + + /* First write the current buffer */ + res = _BufferedWriter_flush_unlocked(self, 0); + if (res == NULL) { + Py_ssize_t *w = _Buffered_check_blocking_error(); + if (w == NULL) + goto error; + if (self->readable) + _BufferedReader_reset_buf(self); + /* Make some place by shifting the buffer. */ + assert(VALID_WRITE_BUFFER(self)); + memmove(self->buffer, self->buffer + self->write_pos, + Py_SAFE_DOWNCAST(self->write_end - self->write_pos, + Py_off_t, Py_ssize_t)); + self->write_end -= self->write_pos; + self->raw_pos -= self->write_pos; + self->pos -= self->write_pos; + self->write_pos = 0; + avail = Py_SAFE_DOWNCAST(self->buffer_size - self->write_end, + Py_off_t, Py_ssize_t); + if (buf.len <= avail) { + /* Everything can be buffered */ + PyErr_Clear(); + memcpy(self->buffer + self->write_end, buf.buf, buf.len); + self->write_end += buf.len; + written = buf.len; + goto end; + } + /* Buffer as much as possible. */ + memcpy(self->buffer + self->write_end, buf.buf, avail); + self->write_end += avail; + /* Already re-raised */ + *w = avail; + goto error; + } + Py_CLEAR(res); + + /* Then write buf itself. At this point the buffer has been emptied. */ + remaining = buf.len; + written = 0; + while (remaining > self->buffer_size) { + n = _BufferedWriter_raw_write( + self, (char *) buf.buf + written, buf.len - written); + if (n == -1) { + Py_ssize_t *w = _Buffered_check_blocking_error(); + if (w == NULL) + goto error; + written += *w; + remaining -= *w; + if (remaining > self->buffer_size) { + /* Can't buffer everything, still buffer as much as possible */ + memcpy(self->buffer, + (char *) buf.buf + written, self->buffer_size); + self->raw_pos = 0; + ADJUST_POSITION(self, self->buffer_size); + self->write_end = self->buffer_size; + *w = written + self->buffer_size; + /* Already re-raised */ + goto error; + } + PyErr_Clear(); + break; + } + written += n; + remaining -= n; + } + if (self->readable) + _BufferedReader_reset_buf(self); + if (remaining > 0) { + memcpy(self->buffer, (char *) buf.buf + written, remaining); + written += remaining; + } + self->write_pos = 0; + /* TODO: sanity check (remaining >= 0) */ + self->write_end = remaining; + ADJUST_POSITION(self, remaining); + self->raw_pos = 0; + +end: + res = PyLong_FromSsize_t(written); + +error: + LEAVE_BUFFERED(self) + PyBuffer_Release(&buf); + return res; +} + +static PyMethodDef BufferedWriter_methods[] = { + /* BufferedIOMixin methods */ + {"close", (PyCFunction)BufferedIOMixin_close, METH_NOARGS}, + {"detach", (PyCFunction)BufferedIOMixin_detach, METH_NOARGS}, + {"seekable", (PyCFunction)BufferedIOMixin_seekable, METH_NOARGS}, + {"readable", (PyCFunction)BufferedIOMixin_readable, METH_NOARGS}, + {"writable", (PyCFunction)BufferedIOMixin_writable, METH_NOARGS}, + {"fileno", (PyCFunction)BufferedIOMixin_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)BufferedIOMixin_isatty, METH_NOARGS}, + + {"write", (PyCFunction)BufferedWriter_write, METH_VARARGS}, + {"truncate", (PyCFunction)Buffered_truncate, METH_VARARGS}, + {"flush", (PyCFunction)Buffered_flush, METH_NOARGS}, + {"seek", (PyCFunction)Buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)Buffered_tell, METH_NOARGS}, + {NULL, NULL} +}; + +static PyMemberDef BufferedWriter_members[] = { + {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0}, + {NULL} +}; + +static PyGetSetDef BufferedWriter_getset[] = { + {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL}, + {"name", (getter)BufferedIOMixin_name_get, NULL, NULL}, + {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedWriter_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedWriter", /*tp_name*/ + sizeof(BufferedObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)BufferedObject_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)Buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + BufferedWriter_doc, /* tp_doc */ + (traverseproc)Buffered_traverse, /* tp_traverse */ + (inquiry)Buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(BufferedObject, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BufferedWriter_methods, /* tp_methods */ + BufferedWriter_members, /* tp_members */ + BufferedWriter_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(BufferedObject, dict), /* tp_dictoffset */ + (initproc)BufferedWriter_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +/* + * BufferedRWPair + */ + +PyDoc_STRVAR(BufferedRWPair_doc, + "A buffered reader and writer object together.\n" + "\n" + "A buffered reader object and buffered writer object put together to\n" + "form a sequential IO object that can read and write. This is typically\n" + "used with a socket or two-way pipe.\n" + "\n" + "reader and writer are RawIOBase objects that are readable and\n" + "writeable respectively. If the buffer_size is omitted it defaults to\n" + "DEFAULT_BUFFER_SIZE.\n" + ); + +/* XXX The usefulness of this (compared to having two separate IO objects) is + * questionable. + */ + +typedef struct { + PyObject_HEAD + BufferedObject *reader; + BufferedObject *writer; + PyObject *dict; + PyObject *weakreflist; +} BufferedRWPairObject; + +static int +BufferedRWPair_init(BufferedRWPairObject *self, PyObject *args, + PyObject *kwds) +{ + PyObject *reader, *writer; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + + if (!PyArg_ParseTuple(args, "OO|nn:BufferedRWPair", &reader, &writer, + &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_checkReadable(reader, Py_True) == NULL) + return -1; + if (_PyIOBase_checkWritable(writer, Py_True) == NULL) + return -1; + + self->reader = (BufferedObject *) PyObject_CallFunction( + (PyObject *) &PyBufferedReader_Type, "On", reader, buffer_size); + if (self->reader == NULL) + return -1; + + self->writer = (BufferedObject *) PyObject_CallFunction( + (PyObject *) &PyBufferedWriter_Type, "On", writer, buffer_size); + if (self->writer == NULL) { + Py_CLEAR(self->reader); + return -1; + } + + return 0; +} + +static int +BufferedRWPair_traverse(BufferedRWPairObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +BufferedRWPair_clear(BufferedRWPairObject *self) +{ + Py_CLEAR(self->reader); + Py_CLEAR(self->writer); + Py_CLEAR(self->dict); + return 0; +} + +static void +BufferedRWPair_dealloc(BufferedRWPairObject *self) +{ + _PyObject_GC_UNTRACK(self); + Py_CLEAR(self->reader); + Py_CLEAR(self->writer); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *) self); +} + +static PyObject * +_forward_call(BufferedObject *self, const char *name, PyObject *args) +{ + PyObject *func = PyObject_GetAttrString((PyObject *)self, name); + PyObject *ret; + + if (func == NULL) { + PyErr_SetString(PyExc_AttributeError, name); + return NULL; + } + + ret = PyObject_CallObject(func, args); + Py_DECREF(func); + return ret; +} + +static PyObject * +BufferedRWPair_read(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->reader, "read", args); +} + +static PyObject * +BufferedRWPair_peek(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->reader, "peek", args); +} + +static PyObject * +BufferedRWPair_read1(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->reader, "read1", args); +} + +static PyObject * +BufferedRWPair_readinto(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->reader, "readinto", args); +} + +static PyObject * +BufferedRWPair_write(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->writer, "write", args); +} + +static PyObject * +BufferedRWPair_flush(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->writer, "flush", args); +} + +static PyObject * +BufferedRWPair_readable(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->reader, "readable", args); +} + +static PyObject * +BufferedRWPair_writable(BufferedRWPairObject *self, PyObject *args) +{ + return _forward_call(self->writer, "writable", args); +} + +static PyObject * +BufferedRWPair_close(BufferedRWPairObject *self, PyObject *args) +{ + PyObject *ret = _forward_call(self->writer, "close", args); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + + return _forward_call(self->reader, "close", args); +} + +static PyObject * +BufferedRWPair_isatty(BufferedRWPairObject *self, PyObject *args) +{ + PyObject *ret = _forward_call(self->writer, "isatty", args); + + if (ret != Py_False) { + /* either True or exception */ + return ret; + } + Py_DECREF(ret); + + return _forward_call(self->reader, "isatty", args); +} + +static PyObject * +BufferedRWPair_closed_get(BufferedRWPairObject *self, void *context) +{ + return PyObject_GetAttr((PyObject *) self->writer, _PyIO_str_closed); +} + +static PyMethodDef BufferedRWPair_methods[] = { + {"read", (PyCFunction)BufferedRWPair_read, METH_VARARGS}, + {"peek", (PyCFunction)BufferedRWPair_peek, METH_VARARGS}, + {"read1", (PyCFunction)BufferedRWPair_read1, METH_VARARGS}, + {"readinto", (PyCFunction)BufferedRWPair_readinto, METH_VARARGS}, + + {"write", (PyCFunction)BufferedRWPair_write, METH_VARARGS}, + {"flush", (PyCFunction)BufferedRWPair_flush, METH_NOARGS}, + + {"readable", (PyCFunction)BufferedRWPair_readable, METH_NOARGS}, + {"writable", (PyCFunction)BufferedRWPair_writable, METH_NOARGS}, + + {"close", (PyCFunction)BufferedRWPair_close, METH_NOARGS}, + {"isatty", (PyCFunction)BufferedRWPair_isatty, METH_NOARGS}, + + {NULL, NULL} +}; + +static PyGetSetDef BufferedRWPair_getset[] = { + {"closed", (getter)BufferedRWPair_closed_get, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyBufferedRWPair_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedRWPair", /*tp_name*/ + sizeof(BufferedRWPairObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)BufferedRWPair_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + BufferedRWPair_doc, /* tp_doc */ + (traverseproc)BufferedRWPair_traverse, /* tp_traverse */ + (inquiry)BufferedRWPair_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(BufferedRWPairObject, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BufferedRWPair_methods, /* tp_methods */ + 0, /* tp_members */ + BufferedRWPair_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(BufferedRWPairObject, dict), /* tp_dictoffset */ + (initproc)BufferedRWPair_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +/* + * BufferedRandom + */ + +PyDoc_STRVAR(BufferedRandom_doc, + "A buffered interface to random access streams.\n" + "\n" + "The constructor creates a reader and writer for a seekable stream,\n" + "raw, given in the first argument. If the buffer_size is omitted it\n" + "defaults to DEFAULT_BUFFER_SIZE. max_buffer_size isn't used anymore.\n" + ); + +static int +BufferedRandom_init(BufferedObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"raw", "buffer_size", "max_buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nn:BufferedReader", kwlist, + &raw, &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_checkSeekable(raw, Py_True) == NULL) + return -1; + if (_PyIOBase_checkReadable(raw, Py_True) == NULL) + return -1; + if (_PyIOBase_checkWritable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->buffer_size = buffer_size; + self->readable = 1; + self->writable = 1; + + if (_Buffered_init(self) < 0) + return -1; + _BufferedReader_reset_buf(self); + _BufferedWriter_reset_buf(self); + self->pos = 0; + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedRandom_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static PyMethodDef BufferedRandom_methods[] = { + /* BufferedIOMixin methods */ + {"close", (PyCFunction)BufferedIOMixin_close, METH_NOARGS}, + {"detach", (PyCFunction)BufferedIOMixin_detach, METH_NOARGS}, + {"seekable", (PyCFunction)BufferedIOMixin_seekable, METH_NOARGS}, + {"readable", (PyCFunction)BufferedIOMixin_readable, METH_NOARGS}, + {"writable", (PyCFunction)BufferedIOMixin_writable, METH_NOARGS}, + {"fileno", (PyCFunction)BufferedIOMixin_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)BufferedIOMixin_isatty, METH_NOARGS}, + + {"flush", (PyCFunction)Buffered_flush, METH_NOARGS}, + + {"seek", (PyCFunction)Buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)Buffered_tell, METH_NOARGS}, + {"truncate", (PyCFunction)Buffered_truncate, METH_VARARGS}, + {"read", (PyCFunction)Buffered_read, METH_VARARGS}, + {"read1", (PyCFunction)Buffered_read1, METH_VARARGS}, + {"readinto", (PyCFunction)Buffered_readinto, METH_VARARGS}, + {"readline", (PyCFunction)Buffered_readline, METH_VARARGS}, + {"peek", (PyCFunction)Buffered_peek, METH_VARARGS}, + {"write", (PyCFunction)BufferedWriter_write, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef BufferedRandom_members[] = { + {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0}, + {NULL} +}; + +static PyGetSetDef BufferedRandom_getset[] = { + {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL}, + {"name", (getter)BufferedIOMixin_name_get, NULL, NULL}, + {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedRandom_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedRandom", /*tp_name*/ + sizeof(BufferedObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)BufferedObject_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)Buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + BufferedRandom_doc, /* tp_doc */ + (traverseproc)Buffered_traverse, /* tp_traverse */ + (inquiry)Buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(BufferedObject, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)Buffered_iternext, /* tp_iternext */ + BufferedRandom_methods, /* tp_methods */ + BufferedRandom_members, /* tp_members */ + BufferedRandom_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /*tp_dict*/ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(BufferedObject, dict), /*tp_dictoffset*/ + (initproc)BufferedRandom_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/bytesio.c --- /dev/null +++ b/Modules/_io/bytesio.c @@ -0,0 +1,764 @@ +#include "Python.h" +#include "structmember.h" /* for offsetof() */ +#include "_iomodule.h" + +typedef struct { + PyObject_HEAD + char *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; + PyObject *dict; + PyObject *weakreflist; +} BytesIOObject; + +#define CHECK_CLOSED(self) \ + if ((self)->buf == NULL) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file."); \ + return NULL; \ + } + +/* Internal routine to get a line from the buffer of a BytesIO + object. Returns the length between the current position to the + next newline character. */ +static Py_ssize_t +get_line(BytesIOObject *self, char **output) +{ + char *n; + const char *str_end; + Py_ssize_t len; + + assert(self->buf != NULL); + + /* Move to the end of the line, up to the end of the string, s. */ + str_end = self->buf + self->string_size; + for (n = self->buf + self->pos; + n < str_end && *n != '\n'; + n++); + + /* Skip the newline character */ + if (n < str_end) + n++; + + /* Get the length from the current position to the end of the line. */ + len = n - (self->buf + self->pos); + *output = self->buf + self->pos; + + assert(len >= 0); + assert(self->pos < PY_SSIZE_T_MAX - len); + self->pos += len; + + return len; +} + +/* Internal routine for changing the size of the buffer of BytesIO objects. + The caller should ensure that the 'size' argument is non-negative. Returns + 0 on success, -1 otherwise. */ +static int +resize_buffer(BytesIOObject *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + char *new_buf = NULL; + + assert(self->buf != NULL); + + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > ((size_t)-1) / sizeof(char)) + goto overflow; + new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +/* Internal routine for writing a string of bytes to the buffer of a BytesIO + object. Returns the number of bytes wrote, or -1 on error. */ +static Py_ssize_t +write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len) +{ + assert(self->buf != NULL); + assert(self->pos >= 0); + assert(len >= 0); + + if ((size_t)self->pos + len > self->buf_size) { + if (resize_buffer(self, (size_t)self->pos + len) < 0) + return -1; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf position + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(char)); + } + + /* Copy the data to the internal buffer, overwriting some of the existing + data if self->pos < self->string_size. */ + memcpy(self->buf + self->pos, bytes, len); + self->pos += len; + + /* Set the new length of the internal string if it has changed. */ + if (self->string_size < self->pos) { + self->string_size = self->pos; + } + + return len; +} + +static PyObject * +bytesio_get_closed(BytesIOObject *self) +{ + if (self->buf == NULL) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} + +/* Generic getter for the writable, readable and seekable properties */ +static PyObject * +return_true(BytesIOObject *self) +{ + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(flush_doc, +"flush() -> None. Does nothing."); + +static PyObject * +bytesio_flush(BytesIOObject *self) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(getval_doc, +"getvalue() -> bytes.\n" +"\n" +"Retrieve the entire contents of the BytesIO object."); + +static PyObject * +bytesio_getvalue(BytesIOObject *self) +{ + CHECK_CLOSED(self); + return PyBytes_FromStringAndSize(self->buf, self->string_size); +} + +PyDoc_STRVAR(isatty_doc, +"isatty() -> False.\n" +"\n" +"Always returns False since BytesIO objects are not connected\n" +"to a tty-like device."); + +static PyObject * +bytesio_isatty(BytesIOObject *self) +{ + CHECK_CLOSED(self); + Py_RETURN_FALSE; +} + +PyDoc_STRVAR(tell_doc, +"tell() -> current file position, an integer\n"); + +static PyObject * +bytesio_tell(BytesIOObject *self) +{ + CHECK_CLOSED(self); + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(read_doc, +"read([size]) -> read at most size bytes, returned as a string.\n" +"\n" +"If the size argument is negative, read until EOF is reached.\n" +"Return an empty string at EOF."); + +static PyObject * +bytesio_read(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:read", &arg)) + return NULL; + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Read until EOF is reached, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + assert(self->buf != NULL); + output = self->buf + self->pos; + self->pos += size; + + return PyBytes_FromStringAndSize(output, size); +} + + +PyDoc_STRVAR(read1_doc, +"read1(size) -> read at most size bytes, returned as a string.\n" +"\n" +"If the size argument is negative or omitted, read until EOF is reached.\n" +"Return an empty string at EOF."); + +static PyObject * +bytesio_read1(BytesIOObject *self, PyObject *n) +{ + PyObject *arg, *res; + + arg = PyTuple_Pack(1, n); + if (arg == NULL) + return NULL; + res = bytesio_read(self, arg); + Py_DECREF(arg); + return res; +} + +PyDoc_STRVAR(readline_doc, +"readline([size]) -> next line from the file, as a string.\n" +"\n" +"Retain newline. A non-negative size argument limits the maximum\n" +"number of bytes to return (an incomplete line may be returned then).\n" +"Return an empty string at EOF.\n"); + +static PyObject * +bytesio_readline(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:readline", &arg)) + return NULL; + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* No size limit, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + n = get_line(self, &output); + + if (size >= 0 && size < n) { + size = n - size; + n -= size; + self->pos -= size; + } + + return PyBytes_FromStringAndSize(output, n); +} + +PyDoc_STRVAR(readlines_doc, +"readlines([size]) -> list of strings, each a line from the file.\n" +"\n" +"Call readline() repeatedly and return a list of the lines so read.\n" +"The optional size argument, if given, is an approximate bound on the\n" +"total number of bytes in the lines returned.\n"); + +static PyObject * +bytesio_readlines(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t maxsize, size, n; + PyObject *result, *line; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:readlines", &arg)) + return NULL; + + if (PyNumber_Check(arg)) { + maxsize = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (maxsize == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* No size limit, by default. */ + maxsize = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + size = 0; + result = PyList_New(0); + if (!result) + return NULL; + + while ((n = get_line(self, &output)) != 0) { + line = PyBytes_FromStringAndSize(output, n); + if (!line) + goto on_error; + if (PyList_Append(result, line) == -1) { + Py_DECREF(line); + goto on_error; + } + Py_DECREF(line); + size += n; + if (maxsize > 0 && size >= maxsize) + break; + } + return result; + + on_error: + Py_DECREF(result); + return NULL; +} + +PyDoc_STRVAR(readinto_doc, +"readinto(bytearray) -> int. Read up to len(b) bytes into b.\n" +"\n" +"Returns number of bytes read (0 for EOF), or None if the object\n" +"is set not to block as has no data to read."); + +static PyObject * +bytesio_readinto(BytesIOObject *self, PyObject *args) +{ + Py_buffer buf; + Py_ssize_t len; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "w*", &buf)) + return NULL; + + len = buf.len; + if (self->pos + len > self->string_size) + len = self->string_size - self->pos; + + memcpy(buf.buf, self->buf + self->pos, len); + assert(self->pos + len < PY_SSIZE_T_MAX); + assert(len >= 0); + self->pos += len; + + PyBuffer_Release(&buf); + return PyLong_FromSsize_t(len); +} + +PyDoc_STRVAR(truncate_doc, +"truncate([size]) -> int. Truncate the file to at most size bytes.\n" +"\n" +"Size defaults to the current file position, as returned by tell().\n" +"Returns the new size. Imply an absolute seek to the position size."); + +static PyObject * +bytesio_truncate(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) + return NULL; + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Truncate to current position if no argument is passed. */ + size = self->pos; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + self->string_size = size; + if (resize_buffer(self, size) < 0) + return NULL; + } + self->pos = size; + + return PyLong_FromSsize_t(size); +} + +static PyObject * +bytesio_iternext(BytesIOObject *self) +{ + char *next; + Py_ssize_t n; + + CHECK_CLOSED(self); + + n = get_line(self, &next); + + if (!next || n == 0) + return NULL; + + return PyBytes_FromStringAndSize(next, n); +} + +PyDoc_STRVAR(seek_doc, +"seek(pos, whence=0) -> int. Change stream position.\n" +"\n" +"Seek to byte offset pos relative to position indicated by whence:\n" +" 0 Start of stream (the default). pos should be >= 0;\n" +" 1 Current position - pos may be negative;\n" +" 2 End of stream - pos usually negative.\n" +"Returns the new absolute position."); + +static PyObject * +bytesio_seek(BytesIOObject *self, PyObject *args) +{ + PyObject *posobj; + Py_ssize_t pos; + int mode = 0; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode)) + return NULL; + + pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError); + if (pos == -1 && PyErr_Occurred()) + return NULL; + + if (pos < 0 && mode == 0) { + PyErr_Format(PyExc_ValueError, + "negative seek value %zd", pos); + return NULL; + } + + /* mode 0: offset relative to beginning of the string. + mode 1: offset relative to current position. + mode 2: offset relative the end of the string. */ + if (mode == 1) { + if (pos > PY_SSIZE_T_MAX - self->pos) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return NULL; + } + pos += self->pos; + } + else if (mode == 2) { + if (pos > PY_SSIZE_T_MAX - self->string_size) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return NULL; + } + pos += self->string_size; + } + else if (mode != 0) { + PyErr_Format(PyExc_ValueError, + "invalid whence (%i, should be 0, 1 or 2)", mode); + return NULL; + } + + if (pos < 0) + pos = 0; + self->pos = pos; + + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(write_doc, +"write(bytes) -> int. Write bytes to file.\n" +"\n" +"Return the number of bytes written."); + +static PyObject * +bytesio_write(BytesIOObject *self, PyObject *obj) +{ + Py_ssize_t n = 0; + Py_buffer buf; + PyObject *result = NULL; + + CHECK_CLOSED(self); + + if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) + return NULL; + + if (buf.len != 0) + n = write_bytes(self, buf.buf, buf.len); + if (n >= 0) + result = PyLong_FromSsize_t(n); + + PyBuffer_Release(&buf); + return result; +} + +PyDoc_STRVAR(writelines_doc, +"writelines(sequence_of_strings) -> None. Write strings to the file.\n" +"\n" +"Note that newlines are not added. The sequence can be any iterable\n" +"object producing strings. This is equivalent to calling write() for\n" +"each string."); + +static PyObject * +bytesio_writelines(BytesIOObject *self, PyObject *v) +{ + PyObject *it, *item; + PyObject *ret; + + CHECK_CLOSED(self); + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + + while ((item = PyIter_Next(it)) != NULL) { + ret = bytesio_write(self, item); + Py_DECREF(item); + if (ret == NULL) { + Py_DECREF(it); + return NULL; + } + Py_DECREF(ret); + } + Py_DECREF(it); + + /* See if PyIter_Next failed */ + if (PyErr_Occurred()) + return NULL; + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(close_doc, +"close() -> None. Disable all I/O operations."); + +static PyObject * +bytesio_close(BytesIOObject *self) +{ + if (self->buf != NULL) { + PyMem_Free(self->buf); + self->buf = NULL; + } + Py_RETURN_NONE; +} + +static void +bytesio_dealloc(BytesIOObject *self) +{ + if (self->buf != NULL) { + PyMem_Free(self->buf); + self->buf = NULL; + } + Py_TYPE(self)->tp_clear((PyObject *)self); + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + BytesIOObject *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (BytesIOObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->string_size = 0; + self->pos = 0; + self->buf_size = 0; + self->buf = (char *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +static int +bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *initvalue = NULL; + + if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue)) + return -1; + + /* In case, __init__ is called multiple times. */ + self->string_size = 0; + self->pos = 0; + + if (initvalue && initvalue != Py_None) { + PyObject *res; + res = bytesio_write(self, initvalue); + if (res == NULL) + return -1; + Py_DECREF(res); + self->pos = 0; + } + + return 0; +} + +static int +bytesio_traverse(BytesIOObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + Py_VISIT(self->weakreflist); + return 0; +} + +static int +bytesio_clear(BytesIOObject *self) +{ + Py_CLEAR(self->dict); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + return 0; +} + + +static PyGetSetDef bytesio_getsetlist[] = { + {"closed", (getter)bytesio_get_closed, NULL, + "True if the file is closed."}, + {NULL}, /* sentinel */ +}; + +static struct PyMethodDef bytesio_methods[] = { + {"readable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"writable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc}, + {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc}, + {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc}, + {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc}, + {"write", (PyCFunction)bytesio_write, METH_O, write_doc}, + {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc}, + {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc}, + {"readinto", (PyCFunction)bytesio_readinto, METH_VARARGS, readinto_doc}, + {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, + {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, + {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, + {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc}, + {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, + {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(bytesio_doc, +"BytesIO([buffer]) -> object\n" +"\n" +"Create a buffered I/O implementation using an in-memory bytes\n" +"buffer, ready for reading and writing."); + +PyTypeObject PyBytesIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BytesIO", /*tp_name*/ + sizeof(BytesIOObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bytesio_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + bytesio_doc, /*tp_doc*/ + (traverseproc)bytesio_traverse, /*tp_traverse*/ + (inquiry)bytesio_clear, /*tp_clear*/ + 0, /*tp_richcompare*/ + offsetof(BytesIOObject, weakreflist), /*tp_weaklistoffset*/ + PyObject_SelfIter, /*tp_iter*/ + (iternextfunc)bytesio_iternext, /*tp_iternext*/ + bytesio_methods, /*tp_methods*/ + 0, /*tp_members*/ + bytesio_getsetlist, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + offsetof(BytesIOObject, dict), /*tp_dictoffset*/ + (initproc)bytesio_init, /*tp_init*/ + 0, /*tp_alloc*/ + bytesio_new, /*tp_new*/ +}; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/fileio.c --- /dev/null +++ b/Modules/_io/fileio.c @@ -0,0 +1,1054 @@ +/* Author: Daniel Stutzbach */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include +#include +#include +#include /* For offsetof */ +#include "_iomodule.h" + +/* + * Known likely problems: + * + * - Files larger then 2**32-1 + * - Files with unicode filenames + * - Passing numbers greater than 2**32-1 when an integer is expected + * - Making it work on Windows and other oddball platforms + * + * To Do: + * + * - autoconfify header file inclusion + */ + +#ifdef MS_WINDOWS +/* can simulate truncate with Win32 API functions; see file_truncate */ +#define HAVE_FTRUNCATE +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#if BUFSIZ < (8*1024) +#define SMALLCHUNK (8*1024) +#elif (BUFSIZ >= (2 << 25)) +#error "unreasonable BUFSIZ > 64MB defined" +#else +#define SMALLCHUNK BUFSIZ +#endif + +#if SIZEOF_INT < 4 +#define BIGCHUNK (512 * 32) +#else +#define BIGCHUNK (512 * 1024) +#endif + +typedef struct { + PyObject_HEAD + int fd; + unsigned readable : 1; + unsigned writable : 1; + int seekable : 2; /* -1 means unknown */ + int closefd : 1; + PyObject *weakreflist; + PyObject *dict; +} PyFileIOObject; + +PyTypeObject PyFileIO_Type; + +#define PyFileIO_Check(op) (PyObject_TypeCheck((op), &PyFileIO_Type)) + +int +_PyFileIO_closed(PyObject *self) +{ + return ((PyFileIOObject *)self)->fd < 0; +} + +static PyObject * +portable_lseek(int fd, PyObject *posobj, int whence); + +static PyObject *portable_lseek(int fd, PyObject *posobj, int whence); + +/* Returns 0 on success, -1 with exception set on failure. */ +static int +internal_close(PyFileIOObject *self) +{ + int err = 0; + int save_errno = 0; + if (self->fd >= 0) { + int fd = self->fd; + self->fd = -1; + /* fd is accessible and someone else may have closed it */ + if (_PyVerify_fd(fd)) { + Py_BEGIN_ALLOW_THREADS + err = close(fd); + if (err < 0) + save_errno = errno; + Py_END_ALLOW_THREADS + } else { + save_errno = errno; + err = -1; + } + } + if (err < 0) { + errno = save_errno; + PyErr_SetFromErrno(PyExc_IOError); + return -1; + } + return 0; +} + +static PyObject * +fileio_close(PyFileIOObject *self) +{ + if (!self->closefd) { + self->fd = -1; + Py_RETURN_NONE; + } + errno = internal_close(self); + if (errno < 0) + return NULL; + + return PyObject_CallMethod((PyObject*)&PyRawIOBase_Type, + "close", "O", self); +} + +static PyObject * +fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyFileIOObject *self; + + assert(type != NULL && type->tp_alloc != NULL); + + self = (PyFileIOObject *) type->tp_alloc(type, 0); + if (self != NULL) { + self->fd = -1; + self->readable = 0; + self->writable = 0; + self->seekable = -1; + self->closefd = 1; + self->weakreflist = NULL; + } + + return (PyObject *) self; +} + +/* On Unix, open will succeed for directories. + In Python, there should be no file objects referring to + directories, so we need a check. */ + +static int +dircheck(PyFileIOObject* self, const char *name) +{ +#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR) + struct stat buf; + if (self->fd < 0) + return 0; + if (fstat(self->fd, &buf) == 0 && S_ISDIR(buf.st_mode)) { + char *msg = strerror(EISDIR); + PyObject *exc; + if (internal_close(self)) + return -1; + + exc = PyObject_CallFunction(PyExc_IOError, "(iss)", + EISDIR, msg, name); + PyErr_SetObject(PyExc_IOError, exc); + Py_XDECREF(exc); + return -1; + } +#endif + return 0; +} + +static int +check_fd(int fd) +{ +#if defined(HAVE_FSTAT) + struct stat buf; + if (!_PyVerify_fd(fd) || (fstat(fd, &buf) < 0 && errno == EBADF)) { + PyObject *exc; + char *msg = strerror(EBADF); + exc = PyObject_CallFunction(PyExc_OSError, "(is)", + EBADF, msg); + PyErr_SetObject(PyExc_OSError, exc); + Py_XDECREF(exc); + return -1; + } +#endif + return 0; +} + + +static int +fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) +{ + PyFileIOObject *self = (PyFileIOObject *) oself; + static char *kwlist[] = {"file", "mode", "closefd", NULL}; + const char *name = NULL; + PyObject *nameobj, *stringobj = NULL; + char *mode = "r"; + char *s; +#ifdef MS_WINDOWS + Py_UNICODE *widename = NULL; +#endif + int ret = 0; + int rwa = 0, plus = 0, append = 0; + int flags = 0; + int fd = -1; + int closefd = 1; + + assert(PyFileIO_Check(oself)); + if (self->fd >= 0) { + /* Have to close the existing file first. */ + if (internal_close(self) < 0) + return -1; + } + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:fileio", + kwlist, &nameobj, &mode, &closefd)) + return -1; + + if (PyFloat_Check(nameobj)) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float"); + return -1; + } + + fd = PyLong_AsLong(nameobj); + if (fd < 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Negative filedescriptor"); + return -1; + } + PyErr_Clear(); + } + +#ifdef MS_WINDOWS + if (GetVersion() < 0x80000000) { + /* On NT, so wide API available */ + if (PyUnicode_Check(nameobj)) + widename = PyUnicode_AS_UNICODE(nameobj); + } + if (widename == NULL) +#endif + if (fd < 0) + { + if (PyBytes_Check(nameobj) || PyByteArray_Check(nameobj)) { + Py_ssize_t namelen; + if (PyObject_AsCharBuffer(nameobj, &name, &namelen) < 0) + return -1; + } + else { + PyObject *u = PyUnicode_FromObject(nameobj); + + if (u == NULL) + return -1; + + stringobj = PyUnicode_AsEncodedString( + u, Py_FileSystemDefaultEncoding, "surrogateescape"); + Py_DECREF(u); + if (stringobj == NULL) + return -1; + if (!PyBytes_Check(stringobj)) { + PyErr_SetString(PyExc_TypeError, + "encoder failed to return bytes"); + goto error; + } + name = PyBytes_AS_STRING(stringobj); + } + } + + s = mode; + while (*s) { + switch (*s++) { + case 'r': + if (rwa) { + bad_mode: + PyErr_SetString(PyExc_ValueError, + "Must have exactly one of read/write/append mode"); + goto error; + } + rwa = 1; + self->readable = 1; + break; + case 'w': + if (rwa) + goto bad_mode; + rwa = 1; + self->writable = 1; + flags |= O_CREAT | O_TRUNC; + break; + case 'a': + if (rwa) + goto bad_mode; + rwa = 1; + self->writable = 1; + flags |= O_CREAT; + append = 1; + break; + case 'b': + break; + case '+': + if (plus) + goto bad_mode; + self->readable = self->writable = 1; + plus = 1; + break; + default: + PyErr_Format(PyExc_ValueError, + "invalid mode: %.200s", mode); + goto error; + } + } + + if (!rwa) + goto bad_mode; + + if (self->readable && self->writable) + flags |= O_RDWR; + else if (self->readable) + flags |= O_RDONLY; + else + flags |= O_WRONLY; + +#ifdef O_BINARY + flags |= O_BINARY; +#endif + +#ifdef O_APPEND + if (append) + flags |= O_APPEND; +#endif + + if (fd >= 0) { + if (check_fd(fd)) + goto error; + self->fd = fd; + self->closefd = closefd; + } + else { + self->closefd = 1; + if (!closefd) { + PyErr_SetString(PyExc_ValueError, + "Cannot use closefd=False with file name"); + goto error; + } + + Py_BEGIN_ALLOW_THREADS + errno = 0; +#ifdef MS_WINDOWS + if (widename != NULL) + self->fd = _wopen(widename, flags, 0666); + else +#endif + self->fd = open(name, flags, 0666); + Py_END_ALLOW_THREADS + if (self->fd < 0) { +#ifdef MS_WINDOWS + if (widename != NULL) + PyErr_SetFromErrnoWithUnicodeFilename(PyExc_IOError, widename); + else +#endif + PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); + goto error; + } + if(dircheck(self, name) < 0) + goto error; + } + + if (PyObject_SetAttrString((PyObject *)self, "name", nameobj) < 0) + goto error; + + if (append) { + /* For consistent behaviour, we explicitly seek to the + end of file (otherwise, it might be done only on the + first write()). */ + PyObject *pos = portable_lseek(self->fd, NULL, 2); + if (pos == NULL) + goto error; + Py_DECREF(pos); + } + + goto done; + + error: + ret = -1; + + done: + Py_CLEAR(stringobj); + return ret; +} + +static int +fileio_traverse(PyFileIOObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +fileio_clear(PyFileIOObject *self) +{ + Py_CLEAR(self->dict); + return 0; +} + +static void +fileio_dealloc(PyFileIOObject *self) +{ + if (_PyIOBase_finalize((PyObject *) self) < 0) + return; + _PyObject_GC_UNTRACK(self); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject * +err_closed(void) +{ + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file"); + return NULL; +} + +static PyObject * +err_mode(char *action) +{ + PyErr_Format(PyExc_ValueError, "File not open for %s", action); + return NULL; +} + +static PyObject * +fileio_fileno(PyFileIOObject *self) +{ + if (self->fd < 0) + return err_closed(); + return PyInt_FromLong((long) self->fd); +} + +static PyObject * +fileio_readable(PyFileIOObject *self) +{ + if (self->fd < 0) + return err_closed(); + return PyBool_FromLong((long) self->readable); +} + +static PyObject * +fileio_writable(PyFileIOObject *self) +{ + if (self->fd < 0) + return err_closed(); + return PyBool_FromLong((long) self->writable); +} + +static PyObject * +fileio_seekable(PyFileIOObject *self) +{ + if (self->fd < 0) + return err_closed(); + if (self->seekable < 0) { + PyObject *pos = portable_lseek(self->fd, NULL, SEEK_CUR); + if (pos == NULL) { + PyErr_Clear(); + self->seekable = 0; + } else { + Py_DECREF(pos); + self->seekable = 1; + } + } + return PyBool_FromLong((long) self->seekable); +} + +static PyObject * +fileio_readinto(PyFileIOObject *self, PyObject *args) +{ + Py_buffer pbuf; + Py_ssize_t n; + + if (self->fd < 0) + return err_closed(); + if (!self->readable) + return err_mode("reading"); + + if (!PyArg_ParseTuple(args, "w*", &pbuf)) + return NULL; + + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = read(self->fd, pbuf.buf, pbuf.len); + Py_END_ALLOW_THREADS + } else + n = -1; + PyBuffer_Release(&pbuf); + if (n < 0) { + if (errno == EAGAIN) + Py_RETURN_NONE; + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + return PyLong_FromSsize_t(n); +} + +static size_t +new_buffersize(PyFileIOObject *self, size_t currentsize) +{ +#ifdef HAVE_FSTAT + off_t pos, end; + struct stat st; + if (fstat(self->fd, &st) == 0) { + end = st.st_size; + pos = lseek(self->fd, 0L, SEEK_CUR); + /* Files claiming a size smaller than SMALLCHUNK may + actually be streaming pseudo-files. In this case, we + apply the more aggressive algorithm below. + */ + if (end >= SMALLCHUNK && end >= pos && pos >= 0) { + /* Add 1 so if the file were to grow we'd notice. */ + return currentsize + end - pos + 1; + } + } +#endif + if (currentsize > SMALLCHUNK) { + /* Keep doubling until we reach BIGCHUNK; + then keep adding BIGCHUNK. */ + if (currentsize <= BIGCHUNK) + return currentsize + currentsize; + else + return currentsize + BIGCHUNK; + } + return currentsize + SMALLCHUNK; +} + +static PyObject * +fileio_readall(PyFileIOObject *self) +{ + PyObject *result; + Py_ssize_t total = 0; + int n; + + if (!_PyVerify_fd(self->fd)) + return PyErr_SetFromErrno(PyExc_IOError); + + result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); + if (result == NULL) + return NULL; + + while (1) { + size_t newsize = new_buffersize(self, total); + if (newsize > PY_SSIZE_T_MAX || newsize <= 0) { + PyErr_SetString(PyExc_OverflowError, + "unbounded read returned more bytes " + "than a Python string can hold "); + Py_DECREF(result); + return NULL; + } + + if (PyBytes_GET_SIZE(result) < (Py_ssize_t)newsize) { + if (_PyBytes_Resize(&result, newsize) < 0) { + if (total == 0) { + Py_DECREF(result); + return NULL; + } + PyErr_Clear(); + break; + } + } + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = read(self->fd, + PyBytes_AS_STRING(result) + total, + newsize - total); + Py_END_ALLOW_THREADS + if (n == 0) + break; + if (n < 0) { + if (total > 0) + break; + if (errno == EAGAIN) { + Py_DECREF(result); + Py_RETURN_NONE; + } + Py_DECREF(result); + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + total += n; + } + + if (PyBytes_GET_SIZE(result) > total) { + if (_PyBytes_Resize(&result, total) < 0) { + /* This should never happen, but just in case */ + Py_DECREF(result); + return NULL; + } + } + return result; +} + +static PyObject * +fileio_read(PyFileIOObject *self, PyObject *args) +{ + char *ptr; + Py_ssize_t n; + Py_ssize_t size = -1; + PyObject *bytes; + + if (self->fd < 0) + return err_closed(); + if (!self->readable) + return err_mode("reading"); + + if (!PyArg_ParseTuple(args, "|n", &size)) + return NULL; + + if (size < 0) { + return fileio_readall(self); + } + + bytes = PyBytes_FromStringAndSize(NULL, size); + if (bytes == NULL) + return NULL; + ptr = PyBytes_AS_STRING(bytes); + + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = read(self->fd, ptr, size); + Py_END_ALLOW_THREADS + } else + n = -1; + + if (n < 0) { + Py_DECREF(bytes); + if (errno == EAGAIN) + Py_RETURN_NONE; + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + if (n != size) { + if (_PyBytes_Resize(&bytes, n) < 0) { + Py_DECREF(bytes); + return NULL; + } + } + + return (PyObject *) bytes; +} + +static PyObject * +fileio_write(PyFileIOObject *self, PyObject *args) +{ + Py_buffer pbuf; + Py_ssize_t n; + + if (self->fd < 0) + return err_closed(); + if (!self->writable) + return err_mode("writing"); + + if (!PyArg_ParseTuple(args, "s*", &pbuf)) + return NULL; + + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = write(self->fd, pbuf.buf, pbuf.len); + Py_END_ALLOW_THREADS + } else + n = -1; + + PyBuffer_Release(&pbuf); + + if (n < 0) { + if (errno == EAGAIN) + Py_RETURN_NONE; + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + return PyLong_FromSsize_t(n); +} + +/* XXX Windows support below is likely incomplete */ + +/* Cribbed from posix_lseek() */ +static PyObject * +portable_lseek(int fd, PyObject *posobj, int whence) +{ + Py_off_t pos, res; + +#ifdef SEEK_SET + /* Turn 0, 1, 2 into SEEK_{SET,CUR,END} */ + switch (whence) { +#if SEEK_SET != 0 + case 0: whence = SEEK_SET; break; +#endif +#if SEEK_CUR != 1 + case 1: whence = SEEK_CUR; break; +#endif +#if SEEK_END != 2 + case 2: whence = SEEK_END; break; +#endif + } +#endif /* SEEK_SET */ + + if (posobj == NULL) + pos = 0; + else { + if(PyFloat_Check(posobj)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return NULL; + } +#if defined(HAVE_LARGEFILE_SUPPORT) + pos = PyLong_AsLongLong(posobj); +#else + pos = PyLong_AsLong(posobj); +#endif + if (PyErr_Occurred()) + return NULL; + } + + if (_PyVerify_fd(fd)) { + Py_BEGIN_ALLOW_THREADS +#if defined(MS_WIN64) || defined(MS_WINDOWS) + res = _lseeki64(fd, pos, whence); +#else + res = lseek(fd, pos, whence); +#endif + Py_END_ALLOW_THREADS + } else + res = -1; + if (res < 0) + return PyErr_SetFromErrno(PyExc_IOError); + +#if defined(HAVE_LARGEFILE_SUPPORT) + return PyLong_FromLongLong(res); +#else + return PyLong_FromLong(res); +#endif +} + +static PyObject * +fileio_seek(PyFileIOObject *self, PyObject *args) +{ + PyObject *posobj; + int whence = 0; + + if (self->fd < 0) + return err_closed(); + + if (!PyArg_ParseTuple(args, "O|i", &posobj, &whence)) + return NULL; + + return portable_lseek(self->fd, posobj, whence); +} + +static PyObject * +fileio_tell(PyFileIOObject *self, PyObject *args) +{ + if (self->fd < 0) + return err_closed(); + + return portable_lseek(self->fd, NULL, 1); +} + +#ifdef HAVE_FTRUNCATE +static PyObject * +fileio_truncate(PyFileIOObject *self, PyObject *args) +{ + PyObject *posobj = NULL; + Py_off_t pos; + int ret; + int fd; + + fd = self->fd; + if (fd < 0) + return err_closed(); + if (!self->writable) + return err_mode("writing"); + + if (!PyArg_ParseTuple(args, "|O", &posobj)) + return NULL; + + if (posobj == Py_None || posobj == NULL) { + /* Get the current position. */ + posobj = portable_lseek(fd, NULL, 1); + if (posobj == NULL) + return NULL; + } + else { + /* Move to the position to be truncated. */ + posobj = portable_lseek(fd, posobj, 0); + } + if (posobj == NULL) + return NULL; + +#if defined(HAVE_LARGEFILE_SUPPORT) + pos = PyLong_AsLongLong(posobj); +#else + pos = PyLong_AsLong(posobj); +#endif + if (pos == -1 && PyErr_Occurred()) + return NULL; + +#ifdef MS_WINDOWS + /* MS _chsize doesn't work if newsize doesn't fit in 32 bits, + so don't even try using it. */ + { + HANDLE hFile; + + /* Truncate. Note that this may grow the file! */ + Py_BEGIN_ALLOW_THREADS + errno = 0; + hFile = (HANDLE)_get_osfhandle(fd); + ret = hFile == (HANDLE)-1; + if (ret == 0) { + ret = SetEndOfFile(hFile) == 0; + if (ret) + errno = EACCES; + } + Py_END_ALLOW_THREADS + } +#else + Py_BEGIN_ALLOW_THREADS + errno = 0; + ret = ftruncate(fd, pos); + Py_END_ALLOW_THREADS +#endif /* !MS_WINDOWS */ + + if (ret != 0) { + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + return posobj; +} +#endif + +static char * +mode_string(PyFileIOObject *self) +{ + if (self->readable) { + if (self->writable) + return "rb+"; + else + return "rb"; + } + else + return "wb"; +} + +static PyObject * +fileio_repr(PyFileIOObject *self) +{ + PyObject *nameobj, *res; + + if (self->fd < 0) + return PyString_FromFormat("<_io.FileIO [closed]>"); + + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return NULL; + res = PyString_FromFormat("<_io.FileIO fd=%d mode='%s'>", + self->fd, mode_string(self)); + } + else { + PyObject *repr = PyObject_Repr(nameobj); + Py_DECREF(nameobj); + if (repr == NULL) + return NULL; + res = PyString_FromFormat("<_io.FileIO name=%s mode='%s'>", + PyString_AS_STRING(repr), + mode_string(self)); + Py_DECREF(repr); + } + return res; +} + +static PyObject * +fileio_isatty(PyFileIOObject *self) +{ + long res; + + if (self->fd < 0) + return err_closed(); + Py_BEGIN_ALLOW_THREADS + res = isatty(self->fd); + Py_END_ALLOW_THREADS + return PyBool_FromLong(res); +} + + +PyDoc_STRVAR(fileio_doc, +"file(name: str[, mode: str]) -> file IO object\n" +"\n" +"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n" +"writing or appending. The file will be created if it doesn't exist\n" +"when opened for writing or appending; it will be truncated when\n" +"opened for writing. Add a '+' to the mode to allow simultaneous\n" +"reading and writing."); + +PyDoc_STRVAR(read_doc, +"read(size: int) -> bytes. read at most size bytes, returned as bytes.\n" +"\n" +"Only makes one system call, so less data may be returned than requested\n" +"In non-blocking mode, returns None if no data is available.\n" +"On end-of-file, returns ''."); + +PyDoc_STRVAR(readall_doc, +"readall() -> bytes. read all data from the file, returned as bytes.\n" +"\n" +"In non-blocking mode, returns as much as is immediately available,\n" +"or None if no data is available. On end-of-file, returns ''."); + +PyDoc_STRVAR(write_doc, +"write(b: bytes) -> int. Write bytes b to file, return number written.\n" +"\n" +"Only makes one system call, so not all of the data may be written.\n" +"The number of bytes actually written is returned."); + +PyDoc_STRVAR(fileno_doc, +"fileno() -> int. \"file descriptor\".\n" +"\n" +"This is needed for lower-level file interfaces, such the fcntl module."); + +PyDoc_STRVAR(seek_doc, +"seek(offset: int[, whence: int]) -> None. Move to new file position.\n" +"\n" +"Argument offset is a byte count. Optional argument whence defaults to\n" +"0 (offset from start of file, offset should be >= 0); other values are 1\n" +"(move relative to current position, positive or negative), and 2 (move\n" +"relative to end of file, usually negative, although many platforms allow\n" +"seeking beyond the end of a file)." +"\n" +"Note that not all file objects are seekable."); + +#ifdef HAVE_FTRUNCATE +PyDoc_STRVAR(truncate_doc, +"truncate([size: int]) -> None. Truncate the file to at most size bytes.\n" +"\n" +"Size defaults to the current file position, as returned by tell()." +"The current file position is changed to the value of size."); +#endif + +PyDoc_STRVAR(tell_doc, +"tell() -> int. Current file position"); + +PyDoc_STRVAR(readinto_doc, +"readinto() -> Same as RawIOBase.readinto()."); + +PyDoc_STRVAR(close_doc, +"close() -> None. Close the file.\n" +"\n" +"A closed file cannot be used for further I/O operations. close() may be\n" +"called more than once without error. Changes the fileno to -1."); + +PyDoc_STRVAR(isatty_doc, +"isatty() -> bool. True if the file is connected to a tty device."); + +PyDoc_STRVAR(seekable_doc, +"seekable() -> bool. True if file supports random-access."); + +PyDoc_STRVAR(readable_doc, +"readable() -> bool. True if file was opened in a read mode."); + +PyDoc_STRVAR(writable_doc, +"writable() -> bool. True if file was opened in a write mode."); + +static PyMethodDef fileio_methods[] = { + {"read", (PyCFunction)fileio_read, METH_VARARGS, read_doc}, + {"readall", (PyCFunction)fileio_readall, METH_NOARGS, readall_doc}, + {"readinto", (PyCFunction)fileio_readinto, METH_VARARGS, readinto_doc}, + {"write", (PyCFunction)fileio_write, METH_VARARGS, write_doc}, + {"seek", (PyCFunction)fileio_seek, METH_VARARGS, seek_doc}, + {"tell", (PyCFunction)fileio_tell, METH_VARARGS, tell_doc}, +#ifdef HAVE_FTRUNCATE + {"truncate", (PyCFunction)fileio_truncate, METH_VARARGS, truncate_doc}, +#endif + {"close", (PyCFunction)fileio_close, METH_NOARGS, close_doc}, + {"seekable", (PyCFunction)fileio_seekable, METH_NOARGS, seekable_doc}, + {"readable", (PyCFunction)fileio_readable, METH_NOARGS, readable_doc}, + {"writable", (PyCFunction)fileio_writable, METH_NOARGS, writable_doc}, + {"fileno", (PyCFunction)fileio_fileno, METH_NOARGS, fileno_doc}, + {"isatty", (PyCFunction)fileio_isatty, METH_NOARGS, isatty_doc}, + {NULL, NULL} /* sentinel */ +}; + +/* 'closed' and 'mode' are attributes for backwards compatibility reasons. */ + +static PyObject * +get_closed(PyFileIOObject *self, void *closure) +{ + return PyBool_FromLong((long)(self->fd < 0)); +} + +static PyObject * +get_closefd(PyFileIOObject *self, void *closure) +{ + return PyBool_FromLong((long)(self->closefd)); +} + +static PyObject * +get_mode(PyFileIOObject *self, void *closure) +{ + return PyUnicode_FromString(mode_string(self)); +} + +static PyGetSetDef fileio_getsetlist[] = { + {"closed", (getter)get_closed, NULL, "True if the file is closed"}, + {"closefd", (getter)get_closefd, NULL, + "True if the file descriptor will be closed"}, + {"mode", (getter)get_mode, NULL, "String giving the file mode"}, + {NULL}, +}; + +PyTypeObject PyFileIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.FileIO", + sizeof(PyFileIOObject), + 0, + (destructor)fileio_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + (reprfunc)fileio_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + fileio_doc, /* tp_doc */ + (traverseproc)fileio_traverse, /* tp_traverse */ + (inquiry)fileio_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyFileIOObject, weakreflist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + fileio_methods, /* tp_methods */ + 0, /* tp_members */ + fileio_getsetlist, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(PyFileIOObject, dict), /* tp_dictoffset */ + fileio_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + fileio_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/iobase.c --- /dev/null +++ b/Modules/_io/iobase.c @@ -0,0 +1,894 @@ +/* + An implementation of the I/O abstract base classes hierarchy + as defined by PEP 3116 - "New I/O" + + Classes defined here: IOBase, RawIOBase. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* + * IOBase class, an abstract class + */ + +typedef struct { + PyObject_HEAD + + PyObject *dict; + PyObject *weakreflist; +} IOBaseObject; + +PyDoc_STRVAR(IOBase_doc, + "The abstract base class for all I/O classes, acting on streams of\n" + "bytes. There is no public constructor.\n" + "\n" + "This class provides dummy implementations for many methods that\n" + "derived classes can override selectively; the default implementations\n" + "represent a file that cannot be read, written or seeked.\n" + "\n" + "Even though IOBase does not declare read, readinto, or write because\n" + "their signatures will vary, implementations and clients should\n" + "consider those methods part of the interface. Also, implementations\n" + "may raise a IOError when operations they do not support are called.\n" + "\n" + "The basic type used for binary data read from or written to a file is\n" + "bytes. bytearrays are accepted too, and in some cases (such as\n" + "readinto) needed. Text I/O classes work with str data.\n" + "\n" + "Note that calling any method (even inquiries) on a closed stream is\n" + "undefined. Implementations may raise IOError in this case.\n" + "\n" + "IOBase (and its subclasses) support the iterator protocol, meaning\n" + "that an IOBase object can be iterated over yielding the lines in a\n" + "stream.\n" + "\n" + "IOBase also supports the :keyword:`with` statement. In this example,\n" + "fp is closed after the suite of the with statment is complete:\n" + "\n" + "with open('spam.txt', 'r') as fp:\n" + " fp.write('Spam and eggs!')\n"); + +/* Use this macro whenever you want to check the internal `closed` status + of the IOBase object rather than the virtual `closed` attribute as returned + by whatever subclass. */ + +#define IS_CLOSED(self) \ + PyObject_HasAttrString(self, "__IOBase_closed") + +/* Internal methods */ +static PyObject * +IOBase_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +/* Positionning */ + +PyDoc_STRVAR(IOBase_seek_doc, + "Change stream position.\n" + "\n" + "Change the stream position to byte offset offset. offset is\n" + "interpreted relative to the position indicated by whence. Values\n" + "for whence are:\n" + "\n" + "* 0 -- start of stream (the default); offset should be zero or positive\n" + "* 1 -- current stream position; offset may be negative\n" + "* 2 -- end of stream; offset is usually negative\n" + "\n" + "Return the new absolute position."); + +static PyObject * +IOBase_seek(PyObject *self, PyObject *args) +{ + return IOBase_unsupported("seek"); +} + +PyDoc_STRVAR(IOBase_tell_doc, + "Return current stream position."); + +static PyObject * +IOBase_tell(PyObject *self, PyObject *args) +{ + return PyObject_CallMethod(self, "seek", "ii", 0, 1); +} + +PyDoc_STRVAR(IOBase_truncate_doc, + "Truncate file to size bytes.\n" + "\n" + "Size defaults to the current IO position as reported by tell(). Return\n" + "the new size."); + +static PyObject * +IOBase_truncate(PyObject *self, PyObject *args) +{ + return IOBase_unsupported("truncate"); +} + +/* Flush and close methods */ + +PyDoc_STRVAR(IOBase_flush_doc, + "Flush write buffers, if applicable.\n" + "\n" + "This is not implemented for read-only and non-blocking streams.\n"); + +static PyObject * +IOBase_flush(PyObject *self, PyObject *args) +{ + /* XXX Should this return the number of bytes written??? */ + if (IS_CLOSED(self)) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file."); + return NULL; + } + Py_RETURN_NONE; +} + +PyDoc_STRVAR(IOBase_close_doc, + "Flush and close the IO object.\n" + "\n" + "This method has no effect if the file is already closed.\n"); + +static int +IOBase_closed(PyObject *self) +{ + PyObject *res; + int closed; + /* This gets the derived attribute, which is *not* __IOBase_closed + in most cases! */ + res = PyObject_GetAttr(self, _PyIO_str_closed); + if (res == NULL) + return 0; + closed = PyObject_IsTrue(res); + Py_DECREF(res); + return closed; +} + +static PyObject * +IOBase_closed_get(PyObject *self, void *context) +{ + return PyBool_FromLong(IS_CLOSED(self)); +} + +PyObject * +_PyIOBase_checkClosed(PyObject *self, PyObject *args) +{ + if (IOBase_closed(self)) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file."); + return NULL; + } + if (args == Py_True) + return Py_None; + else + Py_RETURN_NONE; +} + +/* XXX: IOBase thinks it has to maintain its own internal state in + `__IOBase_closed` and call flush() by itself, but it is redundant with + whatever behaviour a non-trivial derived class will implement. */ + +static PyObject * +IOBase_close(PyObject *self, PyObject *args) +{ + PyObject *res; + + if (IS_CLOSED(self)) + Py_RETURN_NONE; + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_flush, NULL); + PyObject_SetAttrString(self, "__IOBase_closed", Py_True); + if (res == NULL) { + /* If flush() fails, just give up */ + if (PyErr_ExceptionMatches(PyExc_IOError)) + PyErr_Clear(); + else + return NULL; + } + Py_XDECREF(res); + Py_RETURN_NONE; +} + +/* Finalization and garbage collection support */ + +int +_PyIOBase_finalize(PyObject *self) +{ + PyObject *res; + PyObject *tp, *v, *tb; + int closed = 1; + int is_zombie; + + /* If _PyIOBase_finalize() is called from a destructor, we need to + resurrect the object as calling close() can invoke arbitrary code. */ + is_zombie = (Py_REFCNT(self) == 0); + if (is_zombie) { + ++Py_REFCNT(self); + } + PyErr_Fetch(&tp, &v, &tb); + /* If `closed` doesn't exist or can't be evaluated as bool, then the + object is probably in an unusable state, so ignore. */ + res = PyObject_GetAttr(self, _PyIO_str_closed); + if (res == NULL) + PyErr_Clear(); + else { + closed = PyObject_IsTrue(res); + Py_DECREF(res); + if (closed == -1) + PyErr_Clear(); + } + if (closed == 0) { + res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_close, + NULL); + /* Silencing I/O errors is bad, but printing spurious tracebacks is + equally as bad, and potentially more frequent (because of + shutdown issues). */ + if (res == NULL) + PyErr_Clear(); + else + Py_DECREF(res); + } + PyErr_Restore(tp, v, tb); + if (is_zombie) { + if (--Py_REFCNT(self) != 0) { + /* The object lives again. The following code is taken from + slot_tp_del in typeobject.c. */ + Py_ssize_t refcnt = Py_REFCNT(self); + _Py_NewReference(self); + Py_REFCNT(self) = refcnt; + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --Py_TYPE(self)->tp_frees; + --Py_TYPE(self)->tp_allocs; +#endif + return -1; + } + } + return 0; +} + +static int +IOBase_traverse(IOBaseObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +IOBase_clear(IOBaseObject *self) +{ + if (_PyIOBase_finalize((PyObject *) self) < 0) + return -1; + Py_CLEAR(self->dict); + return 0; +} + +/* Destructor */ + +static void +IOBase_dealloc(IOBaseObject *self) +{ + /* NOTE: since IOBaseObject has its own dict, Python-defined attributes + are still available here for close() to use. + However, if the derived class declares a __slots__, those slots are + already gone. + */ + if (_PyIOBase_finalize((PyObject *) self) < 0) { + /* When called from a heap type's dealloc, the type will be + decref'ed on return (see e.g. subtype_dealloc in typeobject.c). */ + if (PyType_HasFeature(Py_TYPE(self), Py_TPFLAGS_HEAPTYPE)) + Py_INCREF(Py_TYPE(self)); + return; + } + _PyObject_GC_UNTRACK(self); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *) self); +} + +/* Inquiry methods */ + +PyDoc_STRVAR(IOBase_seekable_doc, + "Return whether object supports random access.\n" + "\n" + "If False, seek(), tell() and truncate() will raise IOError.\n" + "This method may need to do a test seek()."); + +static PyObject * +IOBase_seekable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +PyObject * +_PyIOBase_checkSeekable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_seekable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not seekable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +PyDoc_STRVAR(IOBase_readable_doc, + "Return whether object was opened for reading.\n" + "\n" + "If False, read() will raise IOError."); + +static PyObject * +IOBase_readable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +/* May be called with any object */ +PyObject * +_PyIOBase_checkReadable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_readable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not readable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +PyDoc_STRVAR(IOBase_writable_doc, + "Return whether object was opened for writing.\n" + "\n" + "If False, read() will raise IOError."); + +static PyObject * +IOBase_writable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +/* May be called with any object */ +PyObject * +_PyIOBase_checkWritable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_writable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not writable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +/* Context manager */ + +static PyObject * +IOBase_enter(PyObject *self, PyObject *args) +{ + if (_PyIOBase_checkClosed(self, Py_True) == NULL) + return NULL; + + Py_INCREF(self); + return self; +} + +static PyObject * +IOBase_exit(PyObject *self, PyObject *args) +{ + return PyObject_CallMethodObjArgs(self, _PyIO_str_close, NULL); +} + +/* Lower-level APIs */ + +/* XXX Should these be present even if unimplemented? */ + +PyDoc_STRVAR(IOBase_fileno_doc, + "Returns underlying file descriptor if one exists.\n" + "\n" + "An IOError is raised if the IO object does not use a file descriptor.\n"); + +static PyObject * +IOBase_fileno(PyObject *self, PyObject *args) +{ + return IOBase_unsupported("fileno"); +} + +PyDoc_STRVAR(IOBase_isatty_doc, + "Return whether this is an 'interactive' stream.\n" + "\n" + "Return False if it can't be determined.\n"); + +static PyObject * +IOBase_isatty(PyObject *self, PyObject *args) +{ + if (_PyIOBase_checkClosed(self, Py_True) == NULL) + return NULL; + Py_RETURN_FALSE; +} + +/* Readline(s) and writelines */ + +PyDoc_STRVAR(IOBase_readline_doc, + "Read and return a line from the stream.\n" + "\n" + "If limit is specified, at most limit bytes will be read.\n" + "\n" + "The line terminator is always b'\n' for binary files; for text\n" + "files, the newlines argument to open can be used to select the line\n" + "terminator(s) recognized.\n"); + +static PyObject * +IOBase_readline(PyObject *self, PyObject *args) +{ + /* For backwards compatibility, a (slowish) readline(). */ + + Py_ssize_t limit = -1; + int has_peek = 0; + PyObject *buffer, *result; + Py_ssize_t old_size = -1; + + if (!PyArg_ParseTuple(args, "|n:readline", &limit)) { + return NULL; + } + + if (PyObject_HasAttrString(self, "peek")) + has_peek = 1; + + buffer = PyByteArray_FromStringAndSize(NULL, 0); + if (buffer == NULL) + return NULL; + + while (limit < 0 || Py_SIZE(buffer) < limit) { + Py_ssize_t nreadahead = 1; + PyObject *b; + + if (has_peek) { + PyObject *readahead = PyObject_CallMethod(self, "peek", "i", 1); + if (readahead == NULL) + goto fail; + if (!PyBytes_Check(readahead)) { + PyErr_Format(PyExc_IOError, + "peek() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(readahead)->tp_name); + Py_DECREF(readahead); + goto fail; + } + if (PyBytes_GET_SIZE(readahead) > 0) { + Py_ssize_t n = 0; + const char *buf = PyBytes_AS_STRING(readahead); + if (limit >= 0) { + do { + if (n >= PyBytes_GET_SIZE(readahead) || n >= limit) + break; + if (buf[n++] == '\n') + break; + } while (1); + } + else { + do { + if (n >= PyBytes_GET_SIZE(readahead)) + break; + if (buf[n++] == '\n') + break; + } while (1); + } + nreadahead = n; + } + Py_DECREF(readahead); + } + + b = PyObject_CallMethod(self, "read", "n", nreadahead); + if (b == NULL) + goto fail; + if (!PyBytes_Check(b)) { + PyErr_Format(PyExc_IOError, + "read() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(b)->tp_name); + Py_DECREF(b); + goto fail; + } + if (PyBytes_GET_SIZE(b) == 0) { + Py_DECREF(b); + break; + } + + old_size = PyByteArray_GET_SIZE(buffer); + PyByteArray_Resize(buffer, old_size + PyBytes_GET_SIZE(b)); + memcpy(PyByteArray_AS_STRING(buffer) + old_size, + PyBytes_AS_STRING(b), PyBytes_GET_SIZE(b)); + + Py_DECREF(b); + + if (PyByteArray_AS_STRING(buffer)[PyByteArray_GET_SIZE(buffer) - 1] == '\n') + break; + } + + result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(buffer), + PyByteArray_GET_SIZE(buffer)); + Py_DECREF(buffer); + return result; + fail: + Py_DECREF(buffer); + return NULL; +} + +static PyObject * +IOBase_iter(PyObject *self) +{ + if (_PyIOBase_checkClosed(self, Py_True) == NULL) + return NULL; + + Py_INCREF(self); + return self; +} + +static PyObject * +IOBase_iternext(PyObject *self) +{ + PyObject *line = PyObject_CallMethodObjArgs(self, _PyIO_str_readline, NULL); + + if (line == NULL) + return NULL; + + if (PyObject_Size(line) == 0) { + Py_DECREF(line); + return NULL; + } + + return line; +} + +PyDoc_STRVAR(IOBase_readlines_doc, + "Return a list of lines from the stream.\n" + "\n" + "hint can be specified to control the number of lines read: no more\n" + "lines will be read if the total size (in bytes/characters) of all\n" + "lines so far exceeds hint."); + +static PyObject * +IOBase_readlines(PyObject *self, PyObject *args) +{ + Py_ssize_t hint = -1, length = 0; + PyObject *hintobj = Py_None, *result; + + if (!PyArg_ParseTuple(args, "|O:readlines", &hintobj)) { + return NULL; + } + if (hintobj != Py_None) { + hint = PyNumber_AsSsize_t(hintobj, PyExc_ValueError); + if (hint == -1 && PyErr_Occurred()) + return NULL; + } + + result = PyList_New(0); + if (result == NULL) + return NULL; + + if (hint <= 0) { + /* XXX special-casing this made sense in the Python version in order + to remove the bytecode interpretation overhead, but it could + probably be removed here. */ + PyObject *ret = PyObject_CallMethod(result, "extend", "O", self); + if (ret == NULL) { + Py_DECREF(result); + return NULL; + } + Py_DECREF(ret); + return result; + } + + while (1) { + PyObject *line = PyIter_Next(self); + if (line == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(result); + return NULL; + } + else + break; /* StopIteration raised */ + } + + if (PyList_Append(result, line) < 0) { + Py_DECREF(line); + Py_DECREF(result); + return NULL; + } + length += PyObject_Size(line); + Py_DECREF(line); + + if (length > hint) + break; + } + return result; +} + +static PyObject * +IOBase_writelines(PyObject *self, PyObject *args) +{ + PyObject *lines, *iter, *res; + + if (!PyArg_ParseTuple(args, "O:writelines", &lines)) { + return NULL; + } + + if (_PyIOBase_checkClosed(self, Py_True) == NULL) + return NULL; + + iter = PyObject_GetIter(lines); + if (iter == NULL) + return NULL; + + while (1) { + PyObject *line = PyIter_Next(iter); + if (line == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(iter); + return NULL; + } + else + break; /* Stop Iteration */ + } + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL); + Py_DECREF(line); + if (res == NULL) { + Py_DECREF(iter); + return NULL; + } + Py_DECREF(res); + } + Py_DECREF(iter); + Py_RETURN_NONE; +} + +static PyMethodDef IOBase_methods[] = { + {"seek", IOBase_seek, METH_VARARGS, IOBase_seek_doc}, + {"tell", IOBase_tell, METH_NOARGS, IOBase_tell_doc}, + {"truncate", IOBase_truncate, METH_VARARGS, IOBase_truncate_doc}, + {"flush", IOBase_flush, METH_NOARGS, IOBase_flush_doc}, + {"close", IOBase_close, METH_NOARGS, IOBase_close_doc}, + + {"seekable", IOBase_seekable, METH_NOARGS, IOBase_seekable_doc}, + {"readable", IOBase_readable, METH_NOARGS, IOBase_readable_doc}, + {"writable", IOBase_writable, METH_NOARGS, IOBase_writable_doc}, + + {"_checkClosed", _PyIOBase_checkClosed, METH_NOARGS}, + {"_checkSeekable", _PyIOBase_checkSeekable, METH_NOARGS}, + {"_checkReadable", _PyIOBase_checkReadable, METH_NOARGS}, + {"_checkWritable", _PyIOBase_checkWritable, METH_NOARGS}, + + {"fileno", IOBase_fileno, METH_NOARGS, IOBase_fileno_doc}, + {"isatty", IOBase_isatty, METH_NOARGS, IOBase_isatty_doc}, + + {"__enter__", IOBase_enter, METH_NOARGS}, + {"__exit__", IOBase_exit, METH_VARARGS}, + + {"readline", IOBase_readline, METH_VARARGS, IOBase_readline_doc}, + {"readlines", IOBase_readlines, METH_VARARGS, IOBase_readlines_doc}, + {"writelines", IOBase_writelines, METH_VARARGS}, + + {NULL, NULL} +}; + +static PyGetSetDef IOBase_getset[] = { + {"closed", (getter)IOBase_closed_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._IOBase", /*tp_name*/ + sizeof(IOBaseObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)IOBase_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + IOBase_doc, /* tp_doc */ + (traverseproc)IOBase_traverse, /* tp_traverse */ + (inquiry)IOBase_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(IOBaseObject, weakreflist), /* tp_weaklistoffset */ + IOBase_iter, /* tp_iter */ + IOBase_iternext, /* tp_iternext */ + IOBase_methods, /* tp_methods */ + 0, /* tp_members */ + IOBase_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(IOBaseObject, dict), /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* + * RawIOBase class, Inherits from IOBase. + */ +PyDoc_STRVAR(RawIOBase_doc, + "Base class for raw binary I/O."); + +/* + * The read() method is implemented by calling readinto(); derived classes + * that want to support read() only need to implement readinto() as a + * primitive operation. In general, readinto() can be more efficient than + * read(). + * + * (It would be tempting to also provide an implementation of readinto() in + * terms of read(), in case the latter is a more suitable primitive operation, + * but that would lead to nasty recursion in case a subclass doesn't implement + * either.) +*/ + +static PyObject * +RawIOBase_read(PyObject *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *b, *res; + + if (!PyArg_ParseTuple(args, "|n:read", &n)) { + return NULL; + } + + if (n < 0) + return PyObject_CallMethod(self, "readall", NULL); + + /* TODO: allocate a bytes object directly instead and manually construct + a writable memoryview pointing to it. */ + b = PyByteArray_FromStringAndSize(NULL, n); + if (b == NULL) + return NULL; + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_readinto, b, NULL); + if (res == NULL) { + Py_DECREF(b); + return NULL; + } + + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n == -1 && PyErr_Occurred()) { + Py_DECREF(b); + return NULL; + } + + res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), n); + Py_DECREF(b); + return res; +} + + +PyDoc_STRVAR(RawIOBase_readall_doc, + "Read until EOF, using multiple read() call."); + +static PyObject * +RawIOBase_readall(PyObject *self, PyObject *args) +{ + int r; + PyObject *chunks = PyList_New(0); + PyObject *result; + + if (chunks == NULL) + return NULL; + + while (1) { + PyObject *data = PyObject_CallMethod(self, "read", + "i", DEFAULT_BUFFER_SIZE); + if (!data) { + Py_DECREF(chunks); + return NULL; + } + if (!PyBytes_Check(data)) { + Py_DECREF(chunks); + Py_DECREF(data); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + return NULL; + } + if (PyBytes_GET_SIZE(data) == 0) { + /* EOF */ + Py_DECREF(data); + break; + } + r = PyList_Append(chunks, data); + Py_DECREF(data); + if (r < 0) { + Py_DECREF(chunks); + return NULL; + } + } + result = _PyBytes_Join(_PyIO_empty_bytes, chunks); + Py_DECREF(chunks); + return result; +} + +static PyMethodDef RawIOBase_methods[] = { + {"read", RawIOBase_read, METH_VARARGS}, + {"readall", RawIOBase_readall, METH_NOARGS, RawIOBase_readall_doc}, + {NULL, NULL} +}; + +PyTypeObject PyRawIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._RawIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + RawIOBase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + RawIOBase_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/stringio.c --- /dev/null +++ b/Modules/_io/stringio.c @@ -0,0 +1,756 @@ +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* Implementation note: the buffer is always at least one character longer + than the enclosed string, for proper functioning of _PyIO_find_line_ending. +*/ + +typedef struct { + PyObject_HEAD + Py_UNICODE *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; + + char ok; /* initialized? */ + char closed; + char readuniversal; + char readtranslate; + PyObject *decoder; + PyObject *readnl; + PyObject *writenl; + + PyObject *dict; + PyObject *weakreflist; +} StringIOObject; + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + return NULL; \ + } + +#define CHECK_CLOSED(self) \ + if (self->closed) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file"); \ + return NULL; \ + } + +PyDoc_STRVAR(stringio_doc, + "Text I/O implementation using an in-memory buffer.\n" + "\n" + "The initial_value argument sets the value of object. The newline\n" + "argument is like the one of TextIOWrapper's constructor."); + + +/* Internal routine for changing the size, in terms of characters, of the + buffer of StringIO objects. The caller should ensure that the 'size' + argument is non-negative. Returns 0 on success, -1 otherwise. */ +static int +resize_buffer(StringIOObject *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + Py_UNICODE *new_buf = NULL; + + assert(self->buf != NULL); + + /* Reserve one more char for line ending detection. */ + size = size + 1; + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) + goto overflow; + new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, + alloc * sizeof(Py_UNICODE)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +/* Internal routine for writing a whole PyUnicode object to the buffer of a + StringIO object. Returns 0 on success, or -1 on error. */ +static Py_ssize_t +write_str(StringIOObject *self, PyObject *obj) +{ + Py_UNICODE *str; + Py_ssize_t len; + PyObject *decoded = NULL; + assert(self->buf != NULL); + assert(self->pos >= 0); + + if (self->decoder != NULL) { + decoded = _PyIncrementalNewlineDecoder_decode( + self->decoder, obj, 1 /* always final */); + } + else { + decoded = obj; + Py_INCREF(decoded); + } + if (self->writenl) { + PyObject *translated = PyUnicode_Replace( + decoded, _PyIO_str_nl, self->writenl, -1); + Py_DECREF(decoded); + decoded = translated; + } + if (decoded == NULL) + return -1; + + assert(PyUnicode_Check(decoded)); + str = PyUnicode_AS_UNICODE(decoded); + len = PyUnicode_GET_SIZE(decoded); + + assert(len >= 0); + + /* This overflow check is not strictly necessary. However, it avoids us to + deal with funky things like comparing an unsigned and a signed + integer. */ + if (self->pos > PY_SSIZE_T_MAX - len) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + goto fail; + } + if (self->pos + len > self->string_size) { + if (resize_buffer(self, self->pos + len) < 0) + goto fail; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf positon + + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(Py_UNICODE)); + } + + /* Copy the data to the internal buffer, overwriting some of the + existing data if self->pos < self->string_size. */ + memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); + self->pos += len; + + /* Set the new length of the internal string if it has changed. */ + if (self->string_size < self->pos) { + self->string_size = self->pos; + } + + Py_DECREF(decoded); + return 0; + +fail: + Py_XDECREF(decoded); + return -1; +} + +PyDoc_STRVAR(stringio_getvalue_doc, + "Retrieve the entire contents of the object."); + +static PyObject * +stringio_getvalue(StringIOObject *self) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + return PyUnicode_FromUnicode(self->buf, self->string_size); +} + +PyDoc_STRVAR(stringio_tell_doc, + "Tell the current file position."); + +static PyObject * +stringio_tell(StringIOObject *self) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(stringio_read_doc, + "Read at most n characters, returned as a string.\n" + "\n" + "If the argument is negative or omitted, read until EOF\n" + "is reached. Return an empty string at EOF.\n"); + +static PyObject * +stringio_read(StringIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + Py_UNICODE *output; + PyObject *arg = Py_None; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:read", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Read until EOF is reached, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + output = self->buf + self->pos; + self->pos += size; + return PyUnicode_FromUnicode(output, size); +} + +/* Internal helper, used by stringio_readline and stringio_iternext */ +static PyObject * +_stringio_readline(StringIOObject *self, Py_ssize_t limit) +{ + Py_UNICODE *start, *end, old_char; + Py_ssize_t len, consumed; + + /* In case of overseek, return the empty string */ + if (self->pos >= self->string_size) + return PyUnicode_FromString(""); + + start = self->buf + self->pos; + if (limit < 0 || limit > self->string_size - self->pos) + limit = self->string_size - self->pos; + + end = start + limit; + old_char = *end; + *end = '\0'; + len = _PyIO_find_line_ending( + self->readtranslate, self->readuniversal, self->readnl, + start, end, &consumed); + *end = old_char; + /* If we haven't found any line ending, we just return everything + (`consumed` is ignored). */ + if (len < 0) + len = limit; + self->pos += len; + return PyUnicode_FromUnicode(start, len); +} + +PyDoc_STRVAR(stringio_readline_doc, + "Read until newline or EOF.\n" + "\n" + "Returns an empty string if EOF is hit immediately.\n"); + +static PyObject * +stringio_readline(StringIOObject *self, PyObject *args) +{ + PyObject *arg = Py_None; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:readline", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg != Py_None) { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + return _stringio_readline(self, limit); +} + +static PyObject * +stringio_iternext(StringIOObject *self) +{ + PyObject *line; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (Py_TYPE(self) == &PyStringIO_Type) { + /* Skip method call overhead for speed */ + line = _stringio_readline(self, -1); + } + else { + /* XXX is subclassing StringIO really supported? */ + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyUnicode_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned an str object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyUnicode_GET_SIZE(line) == 0) { + /* Reached EOF */ + Py_DECREF(line); + return NULL; + } + + return line; +} + +PyDoc_STRVAR(stringio_truncate_doc, + "Truncate size to pos.\n" + "\n" + "The pos argument defaults to the current file position, as\n" + "returned by tell(). Imply an absolute seek to pos.\n" + "Returns the new absolute position.\n"); + +static PyObject * +stringio_truncate(StringIOObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *arg = Py_None; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Truncate to current position if no argument is passed. */ + size = self->pos; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "Negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + if (resize_buffer(self, size) < 0) + return NULL; + self->string_size = size; + } + self->pos = size; + + return PyLong_FromSsize_t(size); +} + +PyDoc_STRVAR(stringio_seek_doc, + "Change stream position.\n" + "\n" + "Seek to character offset pos relative to position indicated by whence:\n" + " 0 Start of stream (the default). pos should be >= 0;\n" + " 1 Current position - pos must be 0;\n" + " 2 End of stream - pos must be 0.\n" + "Returns the new absolute position.\n"); + +static PyObject * +stringio_seek(StringIOObject *self, PyObject *args) +{ + PyObject *posobj; + Py_ssize_t pos; + int mode = 0; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode)) + return NULL; + + pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError); + if (pos == -1 && PyErr_Occurred()) + return NULL; + + CHECK_CLOSED(self); + + if (mode != 0 && mode != 1 && mode != 2) { + PyErr_Format(PyExc_ValueError, + "Invalid whence (%i, should be 0, 1 or 2)", mode); + return NULL; + } + else if (pos < 0 && mode == 0) { + PyErr_Format(PyExc_ValueError, + "Negative seek position %zd", pos); + return NULL; + } + else if (mode != 0 && pos != 0) { + PyErr_SetString(PyExc_IOError, + "Can't do nonzero cur-relative seeks"); + return NULL; + } + + /* mode 0: offset relative to beginning of the string. + mode 1: no change to current position. + mode 2: change position to end of file. */ + if (mode == 1) { + pos = self->pos; + } + else if (mode == 2) { + pos = self->string_size; + } + + self->pos = pos; + + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(stringio_write_doc, + "Write string to file.\n" + "\n" + "Returns the number of characters written, which is always equal to\n" + "the length of the string.\n"); + +static PyObject * +stringio_write(StringIOObject *self, PyObject *obj) +{ + Py_ssize_t size; + + CHECK_INITIALIZED(self); + if (!PyUnicode_Check(obj)) { + PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", + Py_TYPE(obj)->tp_name); + return NULL; + } + CHECK_CLOSED(self); + size = PyUnicode_GET_SIZE(obj); + + if (size > 0 && write_str(self, obj) < 0) + return NULL; + + return PyLong_FromSsize_t(size); +} + +PyDoc_STRVAR(stringio_close_doc, + "Close the IO object. Attempting any further operation after the\n" + "object is closed will raise a ValueError.\n" + "\n" + "This method has no effect if the file is already closed.\n"); + +static PyObject * +stringio_close(StringIOObject *self) +{ + self->closed = 1; + /* Free up some memory */ + if (resize_buffer(self, 0) < 0) + return NULL; + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + Py_RETURN_NONE; +} + +static int +stringio_traverse(StringIOObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +stringio_clear(StringIOObject *self) +{ + Py_CLEAR(self->dict); + return 0; +} + +static void +stringio_dealloc(StringIOObject *self) +{ + _PyObject_GC_UNTRACK(self); + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + if (self->buf) + PyMem_Free(self->buf); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + StringIOObject *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (StringIOObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->string_size = 0; + self->pos = 0; + self->buf_size = 0; + self->buf = (Py_UNICODE *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +static int +stringio_init(StringIOObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"initial_value", "newline", NULL}; + PyObject *value = NULL; + char *newline = "\n"; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist, + &value, &newline)) + return -1; + + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %s", newline); + return -1; + } + if (value && value != Py_None && !PyUnicode_Check(value)) { + PyErr_Format(PyExc_ValueError, + "initial_value must be str or None, not %.200s", + Py_TYPE(value)->tp_name); + return -1; + } + + self->ok = 0; + + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + + if (newline) { + self->readnl = PyString_FromString(newline); + if (self->readnl == NULL) + return -1; + } + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->readtranslate = (newline == NULL); + /* If newline == "", we don't translate anything. + If newline == "\n" or newline == None, we translate to "\n", which is + a no-op. + (for newline == None, TextIOWrapper translates to os.sepline, but it + is pointless for StringIO) + */ + if (newline != NULL && newline[0] == '\r') { + self->writenl = PyUnicode_FromString(newline); + } + + if (self->readuniversal) { + self->decoder = PyObject_CallFunction( + (PyObject *)&PyIncrementalNewlineDecoder_Type, + "Oi", Py_None, (int) self->readtranslate); + if (self->decoder == NULL) + return -1; + } + + /* Now everything is set up, resize buffer to size of initial value, + and copy it */ + self->string_size = 0; + if (value && value != Py_None) { + Py_ssize_t len = PyUnicode_GetSize(value); + /* This is a heuristic, for newline translation might change + the string length. */ + if (resize_buffer(self, len) < 0) + return -1; + self->pos = 0; + if (write_str(self, value) < 0) + return -1; + } + else { + if (resize_buffer(self, 0) < 0) + return -1; + } + self->pos = 0; + + self->closed = 0; + self->ok = 1; + return 0; +} + +/* Properties and pseudo-properties */ +static PyObject * +stringio_seekable(StringIOObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_readable(StringIOObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_writable(StringIOObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_buffer(StringIOObject *self, void *context) +{ + PyErr_SetString(_PyIO_unsupported_operation, + "buffer attribute is unsupported on type StringIO"); + return NULL; +} + +static PyObject * +stringio_closed(StringIOObject *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyBool_FromLong(self->closed); +} + +static PyObject * +stringio_line_buffering(StringIOObject *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_FALSE; +} + +static PyObject * +stringio_newlines(StringIOObject *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + if (self->decoder == NULL) + Py_RETURN_NONE; + return PyObject_GetAttr(self->decoder, _PyIO_str_newlines); +} + +static struct PyMethodDef stringio_methods[] = { + {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc}, + {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc}, + {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc}, + {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc}, + {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc}, + {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc}, + {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc}, + {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc}, + + {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS}, + {"readable", (PyCFunction)stringio_readable, METH_NOARGS}, + {"writable", (PyCFunction)stringio_writable, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyGetSetDef stringio_getset[] = { + {"closed", (getter)stringio_closed, NULL, NULL}, + {"newlines", (getter)stringio_newlines, NULL, NULL}, + /* (following comments straight off of the original Python wrapper:) + XXX Cruft to support the TextIOWrapper API. This would only + be meaningful if StringIO supported the buffer attribute. + Hopefully, a better solution, than adding these pseudo-attributes, + will be found. + */ + {"buffer", (getter)stringio_buffer, NULL, NULL}, + {"line_buffering", (getter)stringio_line_buffering, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyStringIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.StringIO", /*tp_name*/ + sizeof(StringIOObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)stringio_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + stringio_doc, /*tp_doc*/ + (traverseproc)stringio_traverse, /*tp_traverse*/ + (inquiry)stringio_clear, /*tp_clear*/ + 0, /*tp_richcompare*/ + offsetof(StringIOObject, weakreflist), /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + (iternextfunc)stringio_iternext, /*tp_iternext*/ + stringio_methods, /*tp_methods*/ + 0, /*tp_members*/ + stringio_getset, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + offsetof(StringIOObject, dict), /*tp_dictoffset*/ + (initproc)stringio_init, /*tp_init*/ + 0, /*tp_alloc*/ + stringio_new, /*tp_new*/ +}; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 Modules/_io/textio.c --- /dev/null +++ b/Modules/_io/textio.c @@ -0,0 +1,2609 @@ +/* + An implementation of Text I/O as defined by PEP 3116 - "New I/O" + + Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* TextIOBase */ + +PyDoc_STRVAR(TextIOBase_doc, + "Base class for text I/O.\n" + "\n" + "This class provides a character and line based interface to stream\n" + "I/O. There is no readinto method because Python's character strings\n" + "are immutable. There is no public constructor.\n" + ); + +static PyObject * +_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +PyDoc_STRVAR(TextIOBase_detach_doc, + "Separate the underlying buffer from the TextIOBase and return it.\n" + "\n" + "After the underlying buffer has been detached, the TextIO is in an\n" + "unusable state.\n" + ); + +static PyObject * +TextIOBase_detach(PyObject *self) +{ + return _unsupported("detach"); +} + +PyDoc_STRVAR(TextIOBase_read_doc, + "Read at most n characters from stream.\n" + "\n" + "Read from underlying buffer until we have n characters or we hit EOF.\n" + "If n is negative or omitted, read until EOF.\n" + ); + +static PyObject * +TextIOBase_read(PyObject *self, PyObject *args) +{ + return _unsupported("read"); +} + +PyDoc_STRVAR(TextIOBase_readline_doc, + "Read until newline or EOF.\n" + "\n" + "Returns an empty string if EOF is hit immediately.\n" + ); + +static PyObject * +TextIOBase_readline(PyObject *self, PyObject *args) +{ + return _unsupported("readline"); +} + +PyDoc_STRVAR(TextIOBase_write_doc, + "Write string to stream.\n" + "Returns the number of characters written (which is always equal to\n" + "the length of the string).\n" + ); + +static PyObject * +TextIOBase_write(PyObject *self, PyObject *args) +{ + return _unsupported("write"); +} + +PyDoc_STRVAR(TextIOBase_encoding_doc, + "Encoding of the text stream.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +TextIOBase_encoding_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(TextIOBase_newlines_doc, + "Line endings translated so far.\n" + "\n" + "Only line endings translated during reading are considered.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +TextIOBase_newlines_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(TextIOBase_errors_doc, + "The error setting of the decoder or encoder.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +TextIOBase_errors_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + + +static PyMethodDef TextIOBase_methods[] = { + {"detach", (PyCFunction)TextIOBase_detach, METH_NOARGS, TextIOBase_detach_doc}, + {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc}, + {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc}, + {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc}, + {NULL, NULL} +}; + +static PyGetSetDef TextIOBase_getset[] = { + {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc}, + {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc}, + {"errors", (getter)TextIOBase_errors_get, NULL, TextIOBase_errors_doc}, + {NULL} +}; + +PyTypeObject PyTextIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._TextIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + TextIOBase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + TextIOBase_methods, /* tp_methods */ + 0, /* tp_members */ + TextIOBase_getset, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +/* IncrementalNewlineDecoder */ + +PyDoc_STRVAR(IncrementalNewlineDecoder_doc, + "Codec used when reading a file in universal newlines mode. It wraps\n" + "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n" + "records the types of newlines encountered. When used with\n" + "translate=False, it ensures that the newline sequence is returned in\n" + "one piece. When used with decoder=None, it expects unicode strings as\n" + "decode input and translates newlines without first invoking an external\n" + "decoder.\n" + ); + +typedef struct { + PyObject_HEAD + PyObject *decoder; + PyObject *errors; + int pendingcr:1; + int translate:1; + unsigned int seennl:3; +} PyNewLineDecoderObject; + +static int +IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self, + PyObject *args, PyObject *kwds) +{ + PyObject *decoder; + int translate; + PyObject *errors = NULL; + char *kwlist[] = {"decoder", "translate", "errors", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder", + kwlist, &decoder, &translate, &errors)) + return -1; + + self->decoder = decoder; + Py_INCREF(decoder); + + if (errors == NULL) { + self->errors = PyUnicode_FromString("strict"); + if (self->errors == NULL) + return -1; + } + else { + Py_INCREF(errors); + self->errors = errors; + } + + self->translate = translate; + self->seennl = 0; + self->pendingcr = 0; + + return 0; +} + +static void +IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self) +{ + Py_CLEAR(self->decoder); + Py_CLEAR(self->errors); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +#define SEEN_CR 1 +#define SEEN_LF 2 +#define SEEN_CRLF 4 +#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) + +PyObject * +_PyIncrementalNewlineDecoder_decode(PyObject *_self, + PyObject *input, int final) +{ + PyObject *output; + Py_ssize_t output_len; + PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self; + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_ValueError, + "IncrementalNewlineDecoder.__init__ not called"); + return NULL; + } + + /* decode input (with the eventual \r from a previous pass) */ + if (self->decoder != Py_None) { + output = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_decode, input, final ? Py_True : Py_False, NULL); + } + else { + output = input; + Py_INCREF(output); + } + + if (output == NULL) + return NULL; + + if (!PyUnicode_Check(output)) { + PyErr_SetString(PyExc_TypeError, + "decoder should return a string result"); + goto error; + } + + output_len = PyUnicode_GET_SIZE(output); + if (self->pendingcr && (final || output_len > 0)) { + Py_UNICODE *out; + PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1); + if (modified == NULL) + goto error; + out = PyUnicode_AS_UNICODE(modified); + out[0] = '\r'; + memcpy(out + 1, PyUnicode_AS_UNICODE(output), + output_len * sizeof(Py_UNICODE)); + Py_DECREF(output); + output = modified; + self->pendingcr = 0; + output_len++; + } + + /* retain last \r even when not translating data: + * then readline() is sure to get \r\n in one pass + */ + if (!final) { + if (output_len > 0 + && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') { + + if (Py_REFCNT(output) == 1) { + if (PyUnicode_Resize(&output, output_len - 1) < 0) + goto error; + } + else { + PyObject *modified = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(output), + output_len - 1); + if (modified == NULL) + goto error; + Py_DECREF(output); + output = modified; + } + self->pendingcr = 1; + } + } + + /* Record which newlines are read and do newline translation if desired, + all in one pass. */ + { + Py_UNICODE *in_str; + Py_ssize_t len; + int seennl = self->seennl; + int only_lf = 0; + + in_str = PyUnicode_AS_UNICODE(output); + len = PyUnicode_GET_SIZE(output); + + if (len == 0) + return output; + + /* If, up to now, newlines are consistently \n, do a quick check + for the \r *byte* with the libc's optimized memchr. + */ + if (seennl == SEEN_LF || seennl == 0) { + only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL); + } + + if (only_lf) { + /* If not already seen, quick scan for a possible "\n" character. + (there's nothing else to be done, even when in translation mode) + */ + if (seennl == 0 && + memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) { + Py_UNICODE *s, *end; + s = in_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while (*s > '\n') + s++; + c = *s++; + if (c == '\n') { + seennl |= SEEN_LF; + break; + } + if (s > end) + break; + } + } + /* Finished: we have scanned for newlines, and none of them + need translating */ + } + else if (!self->translate) { + Py_UNICODE *s, *end; + /* We have already seen all newline types, no need to scan again */ + if (seennl == SEEN_ALL) + goto endscan; + s = in_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while (*s > '\r') + s++; + c = *s++; + if (c == '\n') + seennl |= SEEN_LF; + else if (c == '\r') { + if (*s == '\n') { + seennl |= SEEN_CRLF; + s++; + } + else + seennl |= SEEN_CR; + } + if (s > end) + break; + if (seennl == SEEN_ALL) + break; + } + endscan: + ; + } + else { + PyObject *translated = NULL; + Py_UNICODE *out_str; + Py_UNICODE *in, *out, *end; + if (Py_REFCNT(output) != 1) { + /* We could try to optimize this so that we only do a copy + when there is something to translate. On the other hand, + most decoders should only output non-shared strings, i.e. + translation is done in place. */ + translated = PyUnicode_FromUnicode(NULL, len); + if (translated == NULL) + goto error; + assert(Py_REFCNT(translated) == 1); + memcpy(PyUnicode_AS_UNICODE(translated), + PyUnicode_AS_UNICODE(output), + len * sizeof(Py_UNICODE)); + } + else { + translated = output; + } + out_str = PyUnicode_AS_UNICODE(translated); + in = in_str; + out = out_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while ((c = *in++) > '\r') + *out++ = c; + if (c == '\n') { + *out++ = c; + seennl |= SEEN_LF; + continue; + } + if (c == '\r') { + if (*in == '\n') { + in++; + seennl |= SEEN_CRLF; + } + else + seennl |= SEEN_CR; + *out++ = '\n'; + continue; + } + if (in > end) + break; + *out++ = c; + } + if (translated != output) { + Py_DECREF(output); + output = translated; + } + if (out - out_str != len) { + if (PyUnicode_Resize(&output, out - out_str) < 0) + goto error; + } + } + self->seennl |= seennl; + } + + return output; + + error: + Py_DECREF(output); + return NULL; +} + +static PyObject * +IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self, + PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"input", "final", NULL}; + PyObject *input; + int final = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder", + kwlist, &input, &final)) + return NULL; + return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); +} + +static PyObject * +IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args) +{ + PyObject *buffer; + unsigned PY_LONG_LONG flag; + + if (self->decoder != Py_None) { + PyObject *state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + return NULL; + if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) { + Py_DECREF(state); + return NULL; + } + Py_INCREF(buffer); + Py_DECREF(state); + } + else { + buffer = PyBytes_FromString(""); + flag = 0; + } + flag <<= 1; + if (self->pendingcr) + flag |= 1; + return Py_BuildValue("NK", buffer, flag); +} + +static PyObject * +IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state) +{ + PyObject *buffer; + unsigned PY_LONG_LONG flag; + + if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) + return NULL; + + self->pendingcr = (int) flag & 1; + flag >>= 1; + + if (self->decoder != Py_None) + return PyObject_CallMethod(self->decoder, + "setstate", "((OK))", buffer, flag); + else + Py_RETURN_NONE; +} + +static PyObject * +IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args) +{ + self->seennl = 0; + self->pendingcr = 0; + if (self->decoder != Py_None) + return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); + else + Py_RETURN_NONE; +} + +static PyObject * +IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context) +{ + switch (self->seennl) { + case SEEN_CR: + return PyUnicode_FromString("\r"); + case SEEN_LF: + return PyUnicode_FromString("\n"); + case SEEN_CRLF: + return PyUnicode_FromString("\r\n"); + case SEEN_CR | SEEN_LF: + return Py_BuildValue("ss", "\r", "\n"); + case SEEN_CR | SEEN_CRLF: + return Py_BuildValue("ss", "\r", "\r\n"); + case SEEN_LF | SEEN_CRLF: + return Py_BuildValue("ss", "\n", "\r\n"); + case SEEN_CR | SEEN_LF | SEEN_CRLF: + return Py_BuildValue("sss", "\r", "\n", "\r\n"); + default: + Py_RETURN_NONE; + } + +} + + +static PyMethodDef IncrementalNewlineDecoder_methods[] = { + {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS}, + {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS}, + {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O}, + {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS}, + {NULL} +}; + +static PyGetSetDef IncrementalNewlineDecoder_getset[] = { + {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyIncrementalNewlineDecoder_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.IncrementalNewlineDecoder", /*tp_name*/ + sizeof(PyNewLineDecoderObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + IncrementalNewlineDecoder_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + IncrementalNewlineDecoder_methods, /* tp_methods */ + 0, /* tp_members */ + IncrementalNewlineDecoder_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)IncrementalNewlineDecoder_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* TextIOWrapper */ + +PyDoc_STRVAR(TextIOWrapper_doc, + "Character and line based layer over a BufferedIOBase object, buffer.\n" + "\n" + "encoding gives the name of the encoding that the stream will be\n" + "decoded or encoded with. It defaults to locale.getpreferredencoding.\n" + "\n" + "errors determines the strictness of encoding and decoding (see the\n" + "codecs.register) and defaults to \"strict\".\n" + "\n" + "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n" + "handling of line endings. If it is None, universal newlines is\n" + "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n" + "or '\\r\\n' are translated to '\\n' before being returned to the\n" + "caller. Conversely, on output, '\\n' is translated to the system\n" + "default line seperator, os.linesep. If newline is any other of its\n" + "legal values, that newline becomes the newline when the file is read\n" + "and it is returned untranslated. On output, '\\n' is converted to the\n" + "newline.\n" + "\n" + "If line_buffering is True, a call to flush is implied when a call to\n" + "write contains a newline character." + ); + +typedef PyObject * + (*encodefunc_t)(PyObject *, PyObject *); + +typedef struct +{ + PyObject_HEAD + int ok; /* initialized? */ + int detached; + Py_ssize_t chunk_size; + PyObject *buffer; + PyObject *encoding; + PyObject *encoder; + PyObject *decoder; + PyObject *readnl; + PyObject *errors; + const char *writenl; /* utf-8 encoded, NULL stands for \n */ + char line_buffering; + char readuniversal; + char readtranslate; + char writetranslate; + char seekable; + char telling; + /* Specialized encoding func (see below) */ + encodefunc_t encodefunc; + /* Whether or not it's the start of the stream */ + char encoding_start_of_stream; + + /* Reads and writes are internally buffered in order to speed things up. + However, any read will first flush the write buffer if itsn't empty. + + Please also note that text to be written is first encoded before being + buffered. This is necessary so that encoding errors are immediately + reported to the caller, but it unfortunately means that the + IncrementalEncoder (whose encode() method is always written in Python) + becomes a bottleneck for small writes. + */ + PyObject *decoded_chars; /* buffer for text returned from decoder */ + Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ + PyObject *pending_bytes; /* list of bytes objects waiting to be + written, or NULL */ + Py_ssize_t pending_bytes_count; + PyObject *snapshot; + /* snapshot is either None, or a tuple (dec_flags, next_input) where + * dec_flags is the second (integer) item of the decoder state and + * next_input is the chunk of input bytes that comes next after the + * snapshot point. We use this to reconstruct decoder states in tell(). + */ + + /* Cache raw object if it's a FileIO object */ + PyObject *raw; + + PyObject *weakreflist; + PyObject *dict; +} PyTextIOWrapperObject; + + +/* A couple of specialized cases in order to bypass the slow incremental + encoding methods for the most popular encodings. */ + +static PyObject * +ascii_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +static PyObject * +utf16be_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 1); +} + +static PyObject * +utf16le_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), -1); +} + +static PyObject * +utf16_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + if (!self->encoding_start_of_stream) { + /* Skip the BOM and use native byte ordering */ +#if defined(WORDS_BIGENDIAN) + return utf16be_encode(self, text); +#else + return utf16le_encode(self, text); +#endif + } + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 0); +} + +static PyObject * +utf32be_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 1); +} + +static PyObject * +utf32le_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), -1); +} + +static PyObject * +utf32_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + if (!self->encoding_start_of_stream) { + /* Skip the BOM and use native byte ordering */ +#if defined(WORDS_BIGENDIAN) + return utf32be_encode(self, text); +#else + return utf32le_encode(self, text); +#endif + } + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 0); +} + +static PyObject * +utf8_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +static PyObject * +latin1_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +/* Map normalized encoding names onto the specialized encoding funcs */ + +typedef struct { + const char *name; + encodefunc_t encodefunc; +} encodefuncentry; + +static encodefuncentry encodefuncs[] = { + {"ascii", (encodefunc_t) ascii_encode}, + {"iso8859-1", (encodefunc_t) latin1_encode}, + {"utf-8", (encodefunc_t) utf8_encode}, + {"utf-16-be", (encodefunc_t) utf16be_encode}, + {"utf-16-le", (encodefunc_t) utf16le_encode}, + {"utf-16", (encodefunc_t) utf16_encode}, + {"utf-32-be", (encodefunc_t) utf32be_encode}, + {"utf-32-le", (encodefunc_t) utf32le_encode}, + {"utf-32", (encodefunc_t) utf32_encode}, + {NULL, NULL} +}; + + +static int +TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"buffer", "encoding", "errors", + "newline", "line_buffering", + NULL}; + PyObject *buffer, *raw; + char *encoding = NULL; + char *errors = NULL; + char *newline = NULL; + int line_buffering = 0; + + PyObject *res; + int r; + + self->ok = 0; + self->detached = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio", + kwlist, &buffer, &encoding, &errors, + &newline, &line_buffering)) + return -1; + + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %s", newline); + return -1; + } + + Py_CLEAR(self->buffer); + Py_CLEAR(self->encoding); + Py_CLEAR(self->encoder); + Py_CLEAR(self->decoder); + Py_CLEAR(self->readnl); + Py_CLEAR(self->decoded_chars); + Py_CLEAR(self->pending_bytes); + Py_CLEAR(self->snapshot); + Py_CLEAR(self->errors); + Py_CLEAR(self->raw); + self->decoded_chars_used = 0; + self->pending_bytes_count = 0; + self->encodefunc = NULL; + self->writenl = NULL; + + if (encoding == NULL && self->encoding == NULL) { + if (_PyIO_locale_module == NULL) { + _PyIO_locale_module = PyImport_ImportModule("locale"); + if (_PyIO_locale_module == NULL) + goto catch_ImportError; + else + goto use_locale; + } + else { + use_locale: + self->encoding = PyObject_CallMethod( + _PyIO_locale_module, "getpreferredencoding", NULL); + if (self->encoding == NULL) { + catch_ImportError: + /* + Importing locale can raise a ImportError because of + _functools, and locale.getpreferredencoding can raise a + ImportError if _locale is not available. These will happen + during module building. + */ + if (PyErr_ExceptionMatches(PyExc_ImportError)) { + PyErr_Clear(); + self->encoding = PyString_FromString("ascii"); + } + else + goto error; + } + else if (!PyString_Check(self->encoding)) + Py_CLEAR(self->encoding); + } + } + if (self->encoding != NULL) + encoding = PyString_AsString(self->encoding); + else if (encoding != NULL) { + self->encoding = PyString_FromString(encoding); + if (self->encoding == NULL) + goto error; + } + else { + PyErr_SetString(PyExc_IOError, + "could not determine default encoding"); + } + + if (errors == NULL) + errors = "strict"; + self->errors = PyBytes_FromString(errors); + if (self->errors == NULL) + goto error; + + self->chunk_size = 8192; + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->line_buffering = line_buffering; + self->readtranslate = (newline == NULL); + if (newline) { + self->readnl = PyString_FromString(newline); + if (self->readnl == NULL) + return -1; + } + self->writetranslate = (newline == NULL || newline[0] != '\0'); + if (!self->readuniversal && self->writetranslate) { + self->writenl = PyString_AsString(self->readnl); + if (!strcmp(self->writenl, "\n")) + self->writenl = NULL; + } +#ifdef MS_WINDOWS + else + self->writenl = "\r\n"; +#endif + + /* Build the decoder object */ + res = PyObject_CallMethod(buffer, "readable", NULL); + if (res == NULL) + goto error; + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + goto error; + if (r == 1) { + self->decoder = PyCodec_IncrementalDecoder( + encoding, errors); + if (self->decoder == NULL) + goto error; + + if (self->readuniversal) { + PyObject *incrementalDecoder = PyObject_CallFunction( + (PyObject *)&PyIncrementalNewlineDecoder_Type, + "Oi", self->decoder, (int)self->readtranslate); + if (incrementalDecoder == NULL) + goto error; + Py_CLEAR(self->decoder); + self->decoder = incrementalDecoder; + } + } + + /* Build the encoder object */ + res = PyObject_CallMethod(buffer, "writable", NULL); + if (res == NULL) + goto error; + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + goto error; + if (r == 1) { + PyObject *ci; + self->encoder = PyCodec_IncrementalEncoder( + encoding, errors); + if (self->encoder == NULL) + goto error; + /* Get the normalized named of the codec */ + ci = _PyCodec_Lookup(encoding); + if (ci == NULL) + goto error; + res = PyObject_GetAttrString(ci, "name"); + Py_DECREF(ci); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + } + else if (PyString_Check(res)) { + encodefuncentry *e = encodefuncs; + while (e->name != NULL) { + if (!strcmp(PyString_AS_STRING(res), e->name)) { + self->encodefunc = e->encodefunc; + break; + } + e++; + } + } + Py_XDECREF(res); + } + + self->buffer = buffer; + Py_INCREF(buffer); + + if (Py_TYPE(buffer) == &PyBufferedReader_Type || + Py_TYPE(buffer) == &PyBufferedWriter_Type || + Py_TYPE(buffer) == &PyBufferedRandom_Type) { + raw = PyObject_GetAttrString(buffer, "raw"); + /* Cache the raw FileIO object to speed up 'closed' checks */ + if (raw == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + } + else if (Py_TYPE(raw) == &PyFileIO_Type) + self->raw = raw; + else + Py_DECREF(raw); + } + + res = PyObject_CallMethod(buffer, "seekable", NULL); + if (res == NULL) + goto error; + self->seekable = self->telling = PyObject_IsTrue(res); + Py_DECREF(res); + + self->encoding_start_of_stream = 0; + if (self->seekable && self->encoder) { + PyObject *cookieObj; + int cmp; + + self->encoding_start_of_stream = 1; + + cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL); + if (cookieObj == NULL) + goto error; + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + Py_DECREF(cookieObj); + if (cmp < 0) { + goto error; + } + + if (cmp == 0) { + self->encoding_start_of_stream = 0; + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, + _PyIO_zero, NULL); + if (res == NULL) + goto error; + Py_DECREF(res); + } + } + + self->ok = 1; + return 0; + + error: + return -1; +} + +static int +_TextIOWrapper_clear(PyTextIOWrapperObject *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return -1; + self->ok = 0; + Py_CLEAR(self->buffer); + Py_CLEAR(self->encoding); + Py_CLEAR(self->encoder); + Py_CLEAR(self->decoder); + Py_CLEAR(self->readnl); + Py_CLEAR(self->decoded_chars); + Py_CLEAR(self->pending_bytes); + Py_CLEAR(self->snapshot); + Py_CLEAR(self->errors); + Py_CLEAR(self->raw); + return 0; +} + +static void +TextIOWrapper_dealloc(PyTextIOWrapperObject *self) +{ + if (_TextIOWrapper_clear(self) < 0) + return; + _PyObject_GC_UNTRACK(self); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->buffer); + Py_VISIT(self->encoding); + Py_VISIT(self->encoder); + Py_VISIT(self->decoder); + Py_VISIT(self->readnl); + Py_VISIT(self->decoded_chars); + Py_VISIT(self->pending_bytes); + Py_VISIT(self->snapshot); + Py_VISIT(self->errors); + Py_VISIT(self->raw); + + Py_VISIT(self->dict); + return 0; +} + +static int +TextIOWrapper_clear(PyTextIOWrapperObject *self) +{ + if (_TextIOWrapper_clear(self) < 0) + return -1; + Py_CLEAR(self->dict); + return 0; +} + +static PyObject * +TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context); + +/* This macro takes some shortcuts to make the common case faster. */ +#define CHECK_CLOSED(self) \ + do { \ + int r; \ + PyObject *_res; \ + if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \ + if (self->raw != NULL) \ + r = _PyFileIO_closed(self->raw); \ + else { \ + _res = TextIOWrapper_closed_get(self, NULL); \ + if (_res == NULL) \ + return NULL; \ + r = PyObject_IsTrue(_res); \ + Py_DECREF(_res); \ + if (r < 0) \ + return NULL; \ + } \ + if (r > 0) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file."); \ + return NULL; \ + } \ + } \ + else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \ + return NULL; \ + } while (0) + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "underlying buffer has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return NULL; \ + } + +#define CHECK_INITIALIZED_INT(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "underlying buffer has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return -1; \ + } + + +static PyObject * +TextIOWrapper_detach(PyTextIOWrapperObject *self) +{ + PyObject *buffer, *res; + CHECK_INITIALIZED(self); + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + buffer = self->buffer; + self->buffer = NULL; + self->detached = 1; + self->ok = 0; + return buffer; +} + +Py_LOCAL_INLINE(const Py_UNICODE *) +findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch) +{ + /* like wcschr, but doesn't stop at NULL characters */ + while (size-- > 0) { + if (*s == ch) + return s; + s++; + } + return NULL; +} + +/* Flush the internal write buffer. This doesn't explicitly flush the + underlying buffered object, though. */ +static int +_TextIOWrapper_writeflush(PyTextIOWrapperObject *self) +{ + PyObject *b, *ret; + + if (self->pending_bytes == NULL) + return 0; + b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes); + if (b == NULL) + return -1; + ret = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_write, b, NULL); + Py_DECREF(b); + if (ret == NULL) + return -1; + Py_DECREF(ret); + Py_CLEAR(self->pending_bytes); + self->pending_bytes_count = 0; + return 0; +} + +static PyObject * +TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *ret; + PyObject *text; /* owned reference */ + PyObject *b; + Py_ssize_t textlen; + int haslf = 0; + int needflush = 0; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "U:write", &text)) { + return NULL; + } + + CHECK_CLOSED(self); + + if (self->encoder == NULL) { + PyErr_SetString(PyExc_IOError, "not writable"); + return NULL; + } + + Py_INCREF(text); + + textlen = PyUnicode_GetSize(text); + + if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) + if (findchar(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), '\n')) + haslf = 1; + + if (haslf && self->writetranslate && self->writenl != NULL) { + PyObject *newtext = PyObject_CallMethod( + text, "replace", "ss", "\n", self->writenl); + Py_DECREF(text); + if (newtext == NULL) + return NULL; + text = newtext; + } + + if (self->line_buffering && + (haslf || + findchar(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), '\r'))) + needflush = 1; + + /* XXX What if we were just reading? */ + if (self->encodefunc != NULL) { + b = (*self->encodefunc)((PyObject *) self, text); + self->encoding_start_of_stream = 0; + } + else + b = PyObject_CallMethodObjArgs(self->encoder, + _PyIO_str_encode, text, NULL); + Py_DECREF(text); + if (b == NULL) + return NULL; + + if (self->pending_bytes == NULL) { + self->pending_bytes = PyList_New(0); + if (self->pending_bytes == NULL) { + Py_DECREF(b); + return NULL; + } + self->pending_bytes_count = 0; + } + if (PyList_Append(self->pending_bytes, b) < 0) { + Py_DECREF(b); + return NULL; + } + self->pending_bytes_count += PyBytes_GET_SIZE(b); + Py_DECREF(b); + if (self->pending_bytes_count > self->chunk_size || needflush) { + if (_TextIOWrapper_writeflush(self) < 0) + return NULL; + } + + if (needflush) { + ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + } + + Py_CLEAR(self->snapshot); + + if (self->decoder) { + ret = PyObject_CallMethod(self->decoder, "reset", NULL); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + } + + return PyLong_FromSsize_t(textlen); +} + +/* Steal a reference to chars and store it in the decoded_char buffer; + */ +static void +TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars) +{ + Py_CLEAR(self->decoded_chars); + self->decoded_chars = chars; + self->decoded_chars_used = 0; +} + +static PyObject * +TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n) +{ + PyObject *chars; + Py_ssize_t avail; + + if (self->decoded_chars == NULL) + return PyUnicode_FromStringAndSize(NULL, 0); + + avail = (PyUnicode_GET_SIZE(self->decoded_chars) + - self->decoded_chars_used); + + assert(avail >= 0); + + if (n < 0 || n > avail) + n = avail; + + if (self->decoded_chars_used > 0 || n < avail) { + chars = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(self->decoded_chars) + + self->decoded_chars_used, n); + if (chars == NULL) + return NULL; + } + else { + chars = self->decoded_chars; + Py_INCREF(chars); + } + + self->decoded_chars_used += n; + return chars; +} + +/* Read and decode the next chunk of data from the BufferedReader. + */ +static int +TextIOWrapper_read_chunk(PyTextIOWrapperObject *self) +{ + PyObject *dec_buffer = NULL; + PyObject *dec_flags = NULL; + PyObject *input_chunk = NULL; + PyObject *decoded_chars, *chunk_size; + int eof; + + /* The return value is True unless EOF was reached. The decoded string is + * placed in self._decoded_chars (replacing its previous value). The + * entire input chunk is sent to the decoder, though some of it may remain + * buffered in the decoder, yet to be converted. + */ + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_IOError, "not readable"); + return -1; + } + + if (self->telling) { + /* To prepare for tell(), we need to snapshot a point in the file + * where the decoder's input buffer is empty. + */ + + PyObject *state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + return -1; + /* Given this, we know there was a valid snapshot point + * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + */ + if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) { + Py_DECREF(state); + return -1; + } + Py_INCREF(dec_buffer); + Py_INCREF(dec_flags); + Py_DECREF(state); + } + + /* Read a chunk, decode it, and put the result in self._decoded_chars. */ + chunk_size = PyLong_FromSsize_t(self->chunk_size); + if (chunk_size == NULL) + goto fail; + input_chunk = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_read1, chunk_size, NULL); + Py_DECREF(chunk_size); + if (input_chunk == NULL) + goto fail; + assert(PyBytes_Check(input_chunk)); + + eof = (PyBytes_Size(input_chunk) == 0); + + if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { + decoded_chars = _PyIncrementalNewlineDecoder_decode( + self->decoder, input_chunk, eof); + } + else { + decoded_chars = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); + } + + /* TODO sanity check: isinstance(decoded_chars, unicode) */ + if (decoded_chars == NULL) + goto fail; + TextIOWrapper_set_decoded_chars(self, decoded_chars); + if (PyUnicode_GET_SIZE(decoded_chars) > 0) + eof = 0; + + if (self->telling) { + /* At the snapshot point, len(dec_buffer) bytes before the read, the + * next input to be decoded is dec_buffer + input_chunk. + */ + PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); + if (next_input == NULL) + goto fail; + assert (PyBytes_Check(next_input)); + Py_DECREF(dec_buffer); + Py_CLEAR(self->snapshot); + self->snapshot = Py_BuildValue("NN", dec_flags, next_input); + } + Py_DECREF(input_chunk); + + return (eof == 0); + + fail: + Py_XDECREF(dec_buffer); + Py_XDECREF(dec_flags); + Py_XDECREF(input_chunk); + return -1; +} + +static PyObject * +TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *result = NULL, *chunks = NULL; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "|n:read", &n)) + return NULL; + + CHECK_CLOSED(self); + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_IOError, "not readable"); + return NULL; + } + + if (_TextIOWrapper_writeflush(self) < 0) + return NULL; + + if (n < 0) { + /* Read everything */ + PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL); + PyObject *decoded, *final; + if (bytes == NULL) + goto fail; + decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, + bytes, Py_True, NULL); + Py_DECREF(bytes); + if (decoded == NULL) + goto fail; + + result = TextIOWrapper_get_decoded_chars(self, -1); + + if (result == NULL) { + Py_DECREF(decoded); + return NULL; + } + + final = PyUnicode_Concat(result, decoded); + Py_DECREF(result); + Py_DECREF(decoded); + if (final == NULL) + goto fail; + + Py_CLEAR(self->snapshot); + return final; + } + else { + int res = 1; + Py_ssize_t remaining = n; + + result = TextIOWrapper_get_decoded_chars(self, n); + if (result == NULL) + goto fail; + remaining -= PyUnicode_GET_SIZE(result); + + /* Keep reading chunks until we have n characters to return */ + while (remaining > 0) { + res = TextIOWrapper_read_chunk(self); + if (res < 0) + goto fail; + if (res == 0) /* EOF */ + break; + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto fail; + } + if (PyList_Append(chunks, result) < 0) + goto fail; + Py_DECREF(result); + result = TextIOWrapper_get_decoded_chars(self, remaining); + if (result == NULL) + goto fail; + remaining -= PyUnicode_GET_SIZE(result); + } + if (chunks != NULL) { + if (result != NULL && PyList_Append(chunks, result) < 0) + goto fail; + Py_CLEAR(result); + result = PyUnicode_Join(_PyIO_empty_str, chunks); + if (result == NULL) + goto fail; + Py_CLEAR(chunks); + } + return result; + } + fail: + Py_XDECREF(result); + Py_XDECREF(chunks); + return NULL; +} + + +/* NOTE: `end` must point to the real end of the Py_UNICODE storage, + that is to the NUL character. Otherwise the function will produce + incorrect results. */ +static Py_UNICODE * +find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch) +{ + Py_UNICODE *s = start; + for (;;) { + while (*s > ch) + s++; + if (*s == ch) + return s; + if (s == end) + return NULL; + s++; + } +} + +Py_ssize_t +_PyIO_find_line_ending( + int translated, int universal, PyObject *readnl, + Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed) +{ + Py_ssize_t len = end - start; + + if (translated) { + /* Newlines are already translated, only search for \n */ + Py_UNICODE *pos = find_control_char(start, end, '\n'); + if (pos != NULL) + return pos - start + 1; + else { + *consumed = len; + return -1; + } + } + else if (universal) { + /* Universal newline search. Find any of \r, \r\n, \n + * The decoder ensures that \r\n are not split in two pieces + */ + Py_UNICODE *s = start; + for (;;) { + Py_UNICODE ch; + /* Fast path for non-control chars. The loop always ends + since the Py_UNICODE storage is NUL-terminated. */ + while (*s > '\r') + s++; + if (s >= end) { + *consumed = len; + return -1; + } + ch = *s++; + if (ch == '\n') + return s - start; + if (ch == '\r') { + if (*s == '\n') + return s - start + 1; + else + return s - start; + } + } + } + else { + /* Non-universal mode. */ + Py_ssize_t readnl_len = PyString_GET_SIZE(readnl); + unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl); + if (readnl_len == 1) { + Py_UNICODE *pos = find_control_char(start, end, nl[0]); + if (pos != NULL) + return pos - start + 1; + *consumed = len; + return -1; + } + else { + Py_UNICODE *s = start; + Py_UNICODE *e = end - readnl_len + 1; + Py_UNICODE *pos; + if (e < s) + e = s; + while (s < e) { + Py_ssize_t i; + Py_UNICODE *pos = find_control_char(s, end, nl[0]); + if (pos == NULL || pos >= e) + break; + for (i = 1; i < readnl_len; i++) { + if (pos[i] != nl[i]) + break; + } + if (i == readnl_len) + return pos - start + readnl_len; + s = pos + 1; + } + pos = find_control_char(e, end, nl[0]); + if (pos == NULL) + *consumed = len; + else + *consumed = pos - start; + return -1; + } + } +} + +static PyObject * +_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit) +{ + PyObject *line = NULL, *chunks = NULL, *remaining = NULL; + Py_ssize_t start, endpos, chunked, offset_to_buffer; + int res; + + CHECK_CLOSED(self); + + if (_TextIOWrapper_writeflush(self) < 0) + return NULL; + + chunked = 0; + + while (1) { + Py_UNICODE *ptr; + Py_ssize_t line_len; + Py_ssize_t consumed = 0; + + /* First, get some data if necessary */ + res = 1; + while (!self->decoded_chars || + !PyUnicode_GET_SIZE(self->decoded_chars)) { + res = TextIOWrapper_read_chunk(self); + if (res < 0) + goto error; + if (res == 0) + break; + } + if (res == 0) { + /* end of file */ + TextIOWrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + start = endpos = offset_to_buffer = 0; + break; + } + + if (remaining == NULL) { + line = self->decoded_chars; + start = self->decoded_chars_used; + offset_to_buffer = 0; + Py_INCREF(line); + } + else { + assert(self->decoded_chars_used == 0); + line = PyUnicode_Concat(remaining, self->decoded_chars); + start = 0; + offset_to_buffer = PyUnicode_GET_SIZE(remaining); + Py_CLEAR(remaining); + if (line == NULL) + goto error; + } + + ptr = PyUnicode_AS_UNICODE(line); + line_len = PyUnicode_GET_SIZE(line); + + endpos = _PyIO_find_line_ending( + self->readtranslate, self->readuniversal, self->readnl, + ptr + start, ptr + line_len, &consumed); + if (endpos >= 0) { + endpos += start; + if (limit >= 0 && (endpos - start) + chunked >= limit) + endpos = start + limit - chunked; + break; + } + + /* We can put aside up to `endpos` */ + endpos = consumed + start; + if (limit >= 0 && (endpos - start) + chunked >= limit) { + /* Didn't find line ending, but reached length limit */ + endpos = start + limit - chunked; + break; + } + + if (endpos > start) { + /* No line ending seen yet - put aside current data */ + PyObject *s; + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto error; + } + s = PyUnicode_FromUnicode(ptr + start, endpos - start); + if (s == NULL) + goto error; + if (PyList_Append(chunks, s) < 0) { + Py_DECREF(s); + goto error; + } + chunked += PyUnicode_GET_SIZE(s); + Py_DECREF(s); + } + /* There may be some remaining bytes we'll have to prepend to the + next chunk of data */ + if (endpos < line_len) { + remaining = PyUnicode_FromUnicode( + ptr + endpos, line_len - endpos); + if (remaining == NULL) + goto error; + } + Py_CLEAR(line); + /* We have consumed the buffer */ + TextIOWrapper_set_decoded_chars(self, NULL); + } + + if (line != NULL) { + /* Our line ends in the current buffer */ + self->decoded_chars_used = endpos - offset_to_buffer; + if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) { + if (start == 0 && Py_REFCNT(line) == 1) { + if (PyUnicode_Resize(&line, endpos) < 0) + goto error; + } + else { + PyObject *s = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(line) + start, endpos - start); + Py_CLEAR(line); + if (s == NULL) + goto error; + line = s; + } + } + } + if (remaining != NULL) { + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto error; + } + if (PyList_Append(chunks, remaining) < 0) + goto error; + Py_CLEAR(remaining); + } + if (chunks != NULL) { + if (line != NULL && PyList_Append(chunks, line) < 0) + goto error; + Py_CLEAR(line); + line = PyUnicode_Join(_PyIO_empty_str, chunks); + if (line == NULL) + goto error; + Py_DECREF(chunks); + } + if (line == NULL) + line = PyUnicode_FromStringAndSize(NULL, 0); + + return line; + + error: + Py_XDECREF(chunks); + Py_XDECREF(remaining); + Py_XDECREF(line); + return NULL; +} + +static PyObject * +TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *limitobj = NULL; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) { + return NULL; + } + if (limitobj) { + if (!PyNumber_Check(limitobj)) { + PyErr_Format(PyExc_TypeError, + "integer argument expected, got '%.200s'", + Py_TYPE(limitobj)->tp_name); + return NULL; + } + limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + return _TextIOWrapper_readline(self, limit); +} + +/* Seek and Tell */ + +typedef struct { + Py_off_t start_pos; + int dec_flags; + int bytes_to_feed; + int chars_to_skip; + char need_eof; +} CookieStruct; + +/* + To speed up cookie packing/unpacking, we store the fields in a temporary + string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). + The following macros define at which offsets in the intermediary byte + string the various CookieStruct fields will be stored. + */ + +#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) + +#if defined(WORDS_BIGENDIAN) + +# define IS_LITTLE_ENDIAN 0 + +/* We want the least significant byte of start_pos to also be the least + significant byte of the cookie, which means that in big-endian mode we + must copy the fields in reverse order. */ + +# define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) +# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) +# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) +# define OFF_CHARS_TO_SKIP (sizeof(char)) +# define OFF_NEED_EOF 0 + +#else + +# define IS_LITTLE_ENDIAN 1 + +/* Little-endian mode: the least significant byte of start_pos will + naturally end up the least significant byte of the cookie. */ + +# define OFF_START_POS 0 +# define OFF_DEC_FLAGS (sizeof(Py_off_t)) +# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) +# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) +# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) + +#endif + +static int +TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj) +{ + unsigned char buffer[COOKIE_BUF_LEN]; + PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); + if (cookieLong == NULL) + return -1; + + if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), + IS_LITTLE_ENDIAN, 0) < 0) { + Py_DECREF(cookieLong); + return -1; + } + Py_DECREF(cookieLong); + + memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); + memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); + memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); + memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); + memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); + + return 0; +} + +static PyObject * +TextIOWrapper_buildCookie(CookieStruct *cookie) +{ + unsigned char buffer[COOKIE_BUF_LEN]; + + memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); + memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); + memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); + memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); + memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); + + return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0); +} +#undef IS_LITTLE_ENDIAN + +static int +_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self, + CookieStruct *cookie) +{ + PyObject *res; + /* When seeking to the start of the stream, we call decoder.reset() + rather than decoder.getstate(). + This is for a few decoders such as utf-16 for which the state value + at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of + utf-16, that we are expecting a BOM). + */ + if (cookie->start_pos == 0 && cookie->dec_flags == 0) + res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); + else + res = PyObject_CallMethod(self->decoder, "setstate", + "((si))", "", cookie->dec_flags); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; +} + +static int +_TextIOWrapper_encoder_setstate(PyTextIOWrapperObject *self, + CookieStruct *cookie) +{ + PyObject *res; + /* Same as _TextIOWrapper_decoder_setstate() above. */ + if (cookie->start_pos == 0 && cookie->dec_flags == 0) { + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); + self->encoding_start_of_stream = 1; + } + else { + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, + _PyIO_zero, NULL); + self->encoding_start_of_stream = 0; + } + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; +} + +static PyObject * +TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *cookieObj, *posobj; + CookieStruct cookie; + int whence = 0; + PyObject *res; + int cmp; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence)) + return NULL; + CHECK_CLOSED(self); + + Py_INCREF(cookieObj); + + if (!self->seekable) { + PyErr_SetString(PyExc_IOError, + "underlying stream is not seekable"); + goto fail; + } + + if (whence == 1) { + /* seek relative to current position */ + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + if (cmp < 0) + goto fail; + + if (cmp == 0) { + PyErr_SetString(PyExc_IOError, + "can't do nonzero cur-relative seeks"); + goto fail; + } + + /* Seeking to the current position should attempt to + * sync the underlying buffer with the current position. + */ + Py_DECREF(cookieObj); + cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL); + if (cookieObj == NULL) + goto fail; + } + else if (whence == 2) { + /* seek relative to end of file */ + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + if (cmp < 0) + goto fail; + + if (cmp == 0) { + PyErr_SetString(PyExc_IOError, + "can't do nonzero end-relative seeks"); + goto fail; + } + + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + TextIOWrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + if (self->decoder) { + res = PyObject_CallMethod(self->decoder, "reset", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + } + + res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2); + Py_XDECREF(cookieObj); + return res; + } + else if (whence != 0) { + PyErr_Format(PyExc_ValueError, + "invalid whence (%d, should be 0, 1 or 2)", whence); + goto fail; + } + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT); + if (cmp < 0) + goto fail; + + if (cmp == 1) { + PyObject *repr = PyObject_Repr(cookieObj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "negative seek position %s", + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + goto fail; + } + + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + /* The strategy of seek() is to go back to the safe start point + * and replay the effect of read(chars_to_skip) from there. + */ + if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0) + goto fail; + + /* Seek back to the safe start point. */ + posobj = PyLong_FromOff_t(cookie.start_pos); + if (posobj == NULL) + goto fail; + res = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_seek, posobj, NULL); + Py_DECREF(posobj); + if (res == NULL) + goto fail; + Py_DECREF(res); + + TextIOWrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + + /* Restore the decoder to its state from the safe start point. */ + if (self->decoder) { + if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + } + + if (cookie.chars_to_skip) { + /* Just like _read_chunk, feed the decoder and save a snapshot. */ + PyObject *input_chunk = PyObject_CallMethod( + self->buffer, "read", "i", cookie.bytes_to_feed); + PyObject *decoded; + + if (input_chunk == NULL) + goto fail; + + assert (PyBytes_Check(input_chunk)); + + self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); + if (self->snapshot == NULL) { + Py_DECREF(input_chunk); + goto fail; + } + + decoded = PyObject_CallMethod(self->decoder, "decode", + "Oi", input_chunk, (int)cookie.need_eof); + + if (decoded == NULL) + goto fail; + + TextIOWrapper_set_decoded_chars(self, decoded); + + /* Skip chars_to_skip of the decoded characters. */ + if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) { + PyErr_SetString(PyExc_IOError, "can't restore logical file position"); + goto fail; + } + self->decoded_chars_used = cookie.chars_to_skip; + } + else { + self->snapshot = Py_BuildValue("is", cookie.dec_flags, ""); + if (self->snapshot == NULL) + goto fail; + } + + /* Finally, reset the encoder (merely useful for proper BOM handling) */ + if (self->encoder) { + if (_TextIOWrapper_encoder_setstate(self, &cookie) < 0) + goto fail; + } + return cookieObj; + fail: + Py_XDECREF(cookieObj); + return NULL; + +} + +static PyObject * +TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *res; + PyObject *posobj = NULL; + CookieStruct cookie = {0,0,0,0,0}; + PyObject *next_input; + Py_ssize_t chars_to_skip, chars_decoded; + PyObject *saved_state = NULL; + char *input, *input_end; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (!self->seekable) { + PyErr_SetString(PyExc_IOError, + "underlying stream is not seekable"); + goto fail; + } + if (!self->telling) { + PyErr_SetString(PyExc_IOError, + "telling position disabled by next() call"); + goto fail; + } + + if (_TextIOWrapper_writeflush(self) < 0) + return NULL; + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + posobj = PyObject_CallMethod(self->buffer, "tell", NULL); + if (posobj == NULL) + goto fail; + + if (self->decoder == NULL || self->snapshot == NULL) { + assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0); + return posobj; + } + +#if defined(HAVE_LARGEFILE_SUPPORT) + cookie.start_pos = PyLong_AsLongLong(posobj); +#else + cookie.start_pos = PyLong_AsLong(posobj); +#endif + if (PyErr_Occurred()) + goto fail; + + /* Skip backward to the snapshot point (see _read_chunk). */ + if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input)) + goto fail; + + assert (PyBytes_Check(next_input)); + + cookie.start_pos -= PyBytes_GET_SIZE(next_input); + + /* How many decoded characters have been used up since the snapshot? */ + if (self->decoded_chars_used == 0) { + /* We haven't moved from the snapshot point. */ + Py_DECREF(posobj); + return TextIOWrapper_buildCookie(&cookie); + } + + chars_to_skip = self->decoded_chars_used; + + /* Starting from the snapshot position, we will walk the decoder + * forward until it gives us enough decoded characters. + */ + saved_state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (saved_state == NULL) + goto fail; + + /* Note our initial start point. */ + if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + + /* Feed the decoder one byte at a time. As we go, note the + * nearest "safe start point" before the current location + * (a point where the decoder has nothing buffered, so seek() + * can safely start from there and advance to this location). + */ + chars_decoded = 0; + input = PyBytes_AS_STRING(next_input); + input_end = input + PyBytes_GET_SIZE(next_input); + while (input < input_end) { + PyObject *state; + char *dec_buffer; + Py_ssize_t dec_buffer_len; + int dec_flags; + + PyObject *decoded = PyObject_CallMethod( + self->decoder, "decode", "s#", input, 1); + if (decoded == NULL) + goto fail; + assert (PyUnicode_Check(decoded)); + chars_decoded += PyUnicode_GET_SIZE(decoded); + Py_DECREF(decoded); + + cookie.bytes_to_feed += 1; + + state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + goto fail; + if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { + Py_DECREF(state); + goto fail; + } + Py_DECREF(state); + + if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { + /* Decoder buffer is empty, so this is a safe start point. */ + cookie.start_pos += cookie.bytes_to_feed; + chars_to_skip -= chars_decoded; + cookie.dec_flags = dec_flags; + cookie.bytes_to_feed = 0; + chars_decoded = 0; + } + if (chars_decoded >= chars_to_skip) + break; + input++; + } + if (input == input_end) { + /* We didn't get enough decoded data; signal EOF to get more. */ + PyObject *decoded = PyObject_CallMethod( + self->decoder, "decode", "si", "", /* final = */ 1); + if (decoded == NULL) + goto fail; + assert (PyUnicode_Check(decoded)); + chars_decoded += PyUnicode_GET_SIZE(decoded); + Py_DECREF(decoded); + cookie.need_eof = 1; + + if (chars_decoded < chars_to_skip) { + PyErr_SetString(PyExc_IOError, + "can't reconstruct logical file position"); + goto fail; + } + } + + /* finally */ + Py_XDECREF(posobj); + res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); + Py_DECREF(saved_state); + if (res == NULL) + return NULL; + Py_DECREF(res); + + /* The returned cookie corresponds to the last safe start point. */ + cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); + return TextIOWrapper_buildCookie(&cookie); + + fail: + Py_XDECREF(posobj); + if (saved_state) { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); + Py_DECREF(saved_state); + if (res == NULL) + return NULL; + Py_DECREF(res); + + PyErr_Restore(type, value, traceback); + } + return NULL; +} + +static PyObject * +TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *pos = Py_None; + PyObject *res; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) { + return NULL; + } + + res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + + if (pos != Py_None) { + res = PyObject_CallMethodObjArgs((PyObject *) self, + _PyIO_str_seek, pos, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + } + + return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL); +} + +static PyObject * +TextIOWrapper_repr(PyTextIOWrapperObject *self) +{ + PyObject *nameobj, *res; + PyObject *namerepr = NULL, *encrepr = NULL; + + CHECK_INITIALIZED(self); + + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + encrepr = PyObject_Repr(self->encoding); + res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>", + PyString_AS_STRING(encrepr)); + } + else { + encrepr = PyObject_Repr(self->encoding); + namerepr = PyObject_Repr(nameobj); + res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>", + PyString_AS_STRING(namerepr), + PyString_AS_STRING(encrepr)); + Py_DECREF(nameobj); + } + Py_XDECREF(namerepr); + Py_XDECREF(encrepr); + return res; + +error: + Py_XDECREF(namerepr); + Py_XDECREF(encrepr); + return NULL; +} + + +/* Inquiries */ + +static PyObject * +TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "fileno", NULL); +} + +static PyObject * +TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "seekable", NULL); +} + +static PyObject * +TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "readable", NULL); +} + +static PyObject * +TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "writable", NULL); +} + +static PyObject * +TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "isatty", NULL); +} + +static PyObject * +TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + self->telling = self->seekable; + if (_TextIOWrapper_writeflush(self) < 0) + return NULL; + return PyObject_CallMethod(self->buffer, "flush", NULL); +} + +static PyObject * +TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args) +{ + PyObject *res; + CHECK_INITIALIZED(self); + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) { + /* If flush() fails, just give up */ + PyErr_Clear(); + } + else + Py_DECREF(res); + + return PyObject_CallMethod(self->buffer, "close", NULL); +} + +static PyObject * +TextIOWrapper_iternext(PyTextIOWrapperObject *self) +{ + PyObject *line; + + CHECK_INITIALIZED(self); + + self->telling = 0; + if (Py_TYPE(self) == &PyTextIOWrapper_Type) { + /* Skip method call overhead for speed */ + line = _TextIOWrapper_readline(self, -1); + } + else { + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyUnicode_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned an str object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyUnicode_GET_SIZE(line) == 0) { + /* Reached EOF or would have blocked */ + Py_DECREF(line); + Py_CLEAR(self->snapshot); + self->telling = self->seekable; + return NULL; + } + + return line; +} + +static PyObject * +TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyObject_GetAttrString(self->buffer, "name"); +} + +static PyObject * +TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyObject_GetAttr(self->buffer, _PyIO_str_closed); +} + +static PyObject * +TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context) +{ + PyObject *res; + CHECK_INITIALIZED(self); + if (self->decoder == NULL) + Py_RETURN_NONE; + res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + Py_RETURN_NONE; + } + else { + return NULL; + } + } + return res; +} + +static PyObject * +TextIOWrapper_errors_get(PyTextIOWrapperObject *self, void *context) +{ + CHECK_INITIALIZED(self); + Py_INCREF(self->errors); + return self->errors; +} + +static PyObject * +TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyLong_FromSsize_t(self->chunk_size); +} + +static int +TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self, + PyObject *arg, void *context) +{ + Py_ssize_t n; + CHECK_INITIALIZED_INT(self); + n = PyNumber_AsSsize_t(arg, PyExc_TypeError); + if (n == -1 && PyErr_Occurred()) + return -1; + if (n <= 0) { + PyErr_SetString(PyExc_ValueError, + "a strictly positive integer is required"); + return -1; + } + self->chunk_size = n; + return 0; +} + +static PyMethodDef TextIOWrapper_methods[] = { + {"detach", (PyCFunction)TextIOWrapper_detach, METH_NOARGS}, + {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS}, + {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS}, + {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS}, + {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS}, + {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS}, + + {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS}, + {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS}, + {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS}, + {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS}, + {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS}, + + {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS}, + {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS}, + {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef TextIOWrapper_members[] = { + {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY}, + {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY}, + {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY}, + {NULL} +}; + +static PyGetSetDef TextIOWrapper_getset[] = { + {"name", (getter)TextIOWrapper_name_get, NULL, NULL}, + {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL}, +/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, +*/ + {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL}, + {"errors", (getter)TextIOWrapper_errors_get, NULL, NULL}, + {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get, + (setter)TextIOWrapper_chunk_size_set, NULL}, + {NULL} +}; + +PyTypeObject PyTextIOWrapper_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.TextIOWrapper", /*tp_name*/ + sizeof(PyTextIOWrapperObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tps_etattr*/ + 0, /*tp_compare */ + (reprfunc)TextIOWrapper_repr,/*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + TextIOWrapper_doc, /* tp_doc */ + (traverseproc)TextIOWrapper_traverse, /* tp_traverse */ + (inquiry)TextIOWrapper_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */ + TextIOWrapper_methods, /* tp_methods */ + TextIOWrapper_members, /* tp_members */ + TextIOWrapper_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/ + (initproc)TextIOWrapper_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; diff -r c5c04a1ff3b4 -r b3fbeeb23fd6 setup.py --- a/setup.py +++ b/setup.py @@ -438,10 +438,11 @@ class PyBuildExt(build_ext): exts.append( Extension("_heapq", ["_heapqmodule.c"]) ) # operator.add() and similar goodies exts.append( Extension('operator', ['operator.c']) ) - # Python 3.0 _fileio module - exts.append( Extension("_fileio", ["_fileio.c"]) ) - # Python 3.0 _bytesio module - exts.append( Extension("_bytesio", ["_bytesio.c"]) ) + # Python 3.1 _io library + exts.append( Extension("_io", + ["_io/bufferedio.c", "_io/bytesio.c", "_io/fileio.c", + "_io/iobase.c", "_io/_iomodule.c", "_io/stringio.c", "_io/textio.c"], + depends=["_io/_iomodule.h"], include_dirs=["Modules/_io"])) # _functools exts.append( Extension("_functools", ["_functoolsmodule.c"]) ) # _json speedups