Index: Doc/library/asynchat.rst =================================================================== --- Doc/library/asynchat.rst (revision 59392) +++ Doc/library/asynchat.rst (working copy) @@ -76,9 +76,9 @@ .. method:: async_chat.collect_incoming_data(data) - Called with *data* holding an arbitrary amount of received data. The - default method, which must be overridden, raises a - :exc:`NotImplementedError` exception. + Called with *data* holding an arbitrary amount of received data in + the form of a bytes object. The default method, which must be + overridden, raises a :exc:`NotImplementedError` exception. .. method:: async_chat.discard_buffers() @@ -89,10 +89,12 @@ .. method:: async_chat.found_terminator() - Called when the incoming data stream matches the termination condition set - by :meth:`set_terminator`. The default method, which must be overridden, - raises a :exc:`NotImplementedError` exception. The buffered input data - should be available via an instance attribute. + Called when the incoming data stream matches the termination + condition set by :meth:`set_terminator` or + :meth:`set_terminator_str`. The default method, which must be + overridden, raises a :exc:`NotImplementedError` + exception. :meth:`collect_incoming_data` should have stored the + data in some accesible location. .. method:: async_chat.get_terminator() @@ -127,13 +129,39 @@ .. method:: async_chat.push(data) - Creates a :class:`simple_producer` object (*see below*) containing the data - and pushes it on to the channel's ``producer_fifo`` to ensure its - transmission. This is all you need to do to have the channel write the - data out to the network, although it is possible to use your own producers - in more complex schemes to implement encryption and chunking, for example. + Creates a :class:`simple_producer` object (*see below*) containing + the data and pushes it on to the channel's ``producer_fifo`` to + ensure its transmission, where *data* can be interpreted as a bytes + object. This is all you need to do to have the channel write the + data out to the network, although it is possible to use your own + producers in more complex schemes to implement encryption and + chunking, for example. +.. method:: async_chat.push_str(data, encoding) + + Creates a :class:`simple_producer` object (*see below*) containing + the data and pushes it on to the channel's ``producer_fifo`` to + ensure its transmission, where *data* can be encoded into a bytes + object via the specified encoding. This is all you need to do to + have the channel write the data out to the network, although it is + possible to use your own producers in more complex schemes to + implement encryption and chunking, for example. + + It is wise to be aware the of endian behavior of your chosen + encoding, especially when communicating between computers of + different architectures. + + +.. method:: async_chat.push_iterable(iterable) + + Creates a :class:`iterator_producer` object (*see below*) wrapping + iter(*iterable*) and pushes it on to the channel's + ``producer_fifo`` to ensure its transmission, where *iterable* + results in a series of bytes objects. This is all you need to do + to have the channel write the data out to the network. + + .. method:: async_chat.push_with_producer(producer) Takes a producer object and adds it to the producer fifo associated with @@ -165,11 +193,11 @@ +-----------+---------------------------------------------+ | term | Description | +===========+=============================================+ - | *string* | Will call :meth:`found_terminator` when the | - | | string is found in the input stream | + | *bytes* | Will call :meth:`found_terminator` when the | + | | byte sequence is found in the input stream | +-----------+---------------------------------------------+ | *integer* | Will call :meth:`found_terminator` when the | - | | indicated number of characters have been | + | | indicated number of bytes have been | | | received | +-----------+---------------------------------------------+ | ``None`` | The channel continues to collect data | @@ -180,6 +208,18 @@ by the channel after :meth:`found_terminator` is called. +.. method:: async_chat.set_terminator_str(term, encoding) + + Sets the terminating condition to be recognized on the channel. + ``term`` is encoded into a bytes object using the ``encoding``, + after which it is treated just as a bytes object passed to + :meth:`set_terminator`. + + It is wise to be aware the of endian behavior of your chosen + encoding, especially when communicating between computers of + different architectures. + + .. method:: async_chat.writable() Should return ``True`` as long as items remain on the producer fifo, or the @@ -203,6 +243,19 @@ empty string. +.. class:: iterator_producer(iterator) + + A :class:`iterator_producer` takes an iterator and adapts it to the + asynchat producer interface. The objects returned by the iterator + must be valid to pass into the ``bytes`` constructor. + + +.. method:: iterator_producer.more() + + Produces the next chunk of information resulting from the iterator, + or an b'' if the iterator raises :exc:`StopIteration` + + .. class:: fifo([list=None]) Each channel maintains a :class:`fifo` holding data which has been pushed @@ -271,8 +324,8 @@ self.addr = addr self.sessions = sessions self.ibuffer = [] - self.obuffer = "" - self.set_terminator("\r\n\r\n") + self.obuffer = b"" + self.set_terminator(b"\r\n\r\n") self.reading_headers = True self.handling = False self.cgi_data = None @@ -285,10 +338,10 @@ def found_terminator(self): if self.reading_headers: self.reading_headers = False - self.parse_headers("".join(self.ibuffer)) + self.parse_headers(b"".join(self.ibuffer)) self.ibuffer = [] - if self.op.upper() == "POST": - clen = self.headers.getheader("content-length") + if self.op.upper() == b"POST": + clen = self.headers.getheader(b"content-length") self.set_terminator(int(clen)) else: self.handling = True @@ -296,7 +349,7 @@ self.handle_request() elif not self.handling: self.set_terminator(None) # browsers sometimes over-send - self.cgi_data = parse(self.headers, "".join(self.ibuffer)) + self.cgi_data = parse(self.headers, b"".join(self.ibuffer)) self.handling = True self.ibuffer = [] self.handle_request() Index: Doc/library/asyncore.rst =================================================================== --- Doc/library/asyncore.rst (revision 59392) +++ Doc/library/asyncore.rst (working copy) @@ -119,8 +119,10 @@ .. method:: dispatcher.handle_expt() - Called when there is out of band (OOB) data for a socket connection. This - will almost never happen, as OOB is tenuously supported and rarely used. + Called when there is out of band (OOB) data or other exceptional + conditions for a socket connection. This will almost never happen, + as OOB is tenuously supported and rarely used. The default behavior + is to report the exceptional condition as an error. .. method:: dispatcher.handle_connect() Index: Lib/asynchat.py =================================================================== --- Lib/asynchat.py (revision 59392) +++ Lib/asynchat.py (working copy) @@ -32,18 +32,17 @@ the common internet protocols - smtp, nntp, ftp, etc..). The handle_read() method looks at the input stream for the current -'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n' -for multi-line output), calling self.found_terminator() on its -receipt. +'terminator', calling self.found_terminator() on its receipt. for example: -Say you build an async nntp client using this class. At the start -of the connection, you'll have self.terminator set to '\r\n', in -order to process the single-line greeting. Just before issuing a -'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST -command will be accumulated (using your own 'collect_incoming_data' -method) up to the terminator, and then control will be returned to -you - by calling your self.found_terminator() method. +Say you build an async nntp client using this class. At the start of +the connection, you'll have self.terminator set to b'\r\n', in order +to process the single-line greeting. Just before issuing a LIST +command you'll set it to b'\r\n.\r\n'. The output of the LIST command +will be accumulated (using your own 'collect_incoming_data' method) up +to the terminator, and then control will be returned to you - by +calling your self.found_terminator() method. + """ import sys @@ -52,31 +51,128 @@ from collections import deque class async_chat (asyncore.dispatcher): + """This is an abstract class. You must derive from this class, and add - the two methods collect_incoming_data() and found_terminator()""" + the two methods collect_incoming_data() and found_terminator(). + The set_terminator(term) or set_terminator_str(term, encoding) + method almost always should be called before entering + asyncore.loop. + + In addition to overriding methods, children may override the + variables `ac_in_buffer_size` and `ac_out_buffer_size`, which + deterine the number of bytes that may be stored in the incoming + and outgoing buffers, respectively. Changing these will not + affect functionality, but may alter performance. The default + value of each is 4096. + + """ + # these are overridable defaults ac_in_buffer_size = 4096 ac_out_buffer_size = 4096 def __init__ (self, conn=None): + """Constructor, should be called by child's __init__ + + Optional arguments: + conn -- An already initialized socket. + + conn, if provided and not None, will be used as the socket for + this channel. If conn is provided, create_socket and its + related methods need not be called. + + """ + self.ac_in_buffer = b'' self.ac_out_buffer = b'' self.producer_fifo = fifo() asyncore.dispatcher.__init__ (self, conn) def collect_incoming_data(self, data): + """Store received data for later processing + + Arguments: + data -- A bytes object representing some received data. + + collect_incoming_data must be overridden in a child class for + async_chat to be useful. All it needs to do in most cases is + record the data; `found_terminator` is the proper place to do + most processing. + + """ raise NotImplementedError("must be implemented in subclass") def found_terminator(self): + """Process a complete section of data received from the socket + + found_terminator must be overridden in a child class for + async_chat to be useful. It is called when the terminator is + found in the received data stream, after calling + `collect_incoming_data` with all bytes prior to the terminator + in the stream. The terminator is defined by the most recent + call to `set_terminator` or `set_terminator_str`. + + + """ raise NotImplementedError("must be implemented in subclass") def set_terminator (self, term): - "Set the input delimiter. Can be a fixed string of any length, an integer, or None" - self.terminator = term + """Set the input delimiter. + Arguments: + term -- Can be a bytes object, an integer, or None. + + If term is None, async_chat will never call + found_terminator. This is occasionally useful, if + `collect_incoming_data` performs all necessary handling of the + data. + + If term is an int object, async_chat will read that many bytes + from the data stream, and then call `found_terminator`. This + is useful for reading protocol sections of known length. + + If term is neither an int nor None, it will be passed to the + bytes object constructor. async_chat will look for that + sequence of bytes in the incoming data stream, and call + `found_terminator` when it is discovered. This is useful for + reading protocol sections of varying or unknown length. + + set_terminator may be called as many times as you like. This + is quite often useful -- for example in HTTP you might read + the headers by doing set_terminator(b'\n\n') and then extract + the content length from the headers and do + set_terminator(content_length) to read the page data. + + """ + + if term is None: + self.terminator = None + elif isinstance(term, int): + self.terminator = term + else: + self.terminator = bytes(term) + + def set_terminator_str(self, term, encoding): + """Set the input delimiter to an encoded string + + Arguments: + term -- A str object. + encoding -- The name of a codec. + + This method is like `set_terminator`, except that it sets the + terminator to a sequence of bytes representing term in the + given encoding. + + It is very important to be aware of byte ordering when using + this function. Some encodings produce endian-sentive results. + + """ + self.set_terminator(term.encode(encoding)) + def get_terminator (self): + """Return the current terminator""" return self.terminator # grab some more data from the socket, @@ -85,6 +181,7 @@ # if found, transition to the next state. def handle_read (self): + """asyncore function. Overriding children must invoke.""" try: data = self.recv (self.ac_in_buffer_size) @@ -92,9 +189,8 @@ self.handle_error() return - if isinstance(data, str): - data = data.encode('ascii') - self.ac_in_buffer = self.ac_in_buffer + bytes(data) + data = bytes(data) + self.ac_in_buffer = self.ac_in_buffer + data # Continue to search for self.terminator in self.ac_in_buffer, # while calling self.collect_incoming_data. The while loop @@ -108,7 +204,7 @@ # no terminator, collect it all self.collect_incoming_data (self.ac_in_buffer) self.ac_in_buffer = b'' - elif isinstance(terminator, int) or isinstance(terminator, int): + elif isinstance(terminator, int): # numeric terminator n = terminator if lb < n: @@ -129,8 +225,6 @@ # 3) end of buffer does not match any prefix: # collect data terminator_len = len(terminator) - if isinstance(terminator, str): - terminator = terminator.encode('ascii') index = self.ac_in_buffer.find(terminator) if index != -1: # we found the terminator @@ -155,16 +249,73 @@ self.ac_in_buffer = b'' def handle_write (self): + """asyncore function. Overriding children must invoke.""" self.initiate_send () def handle_close (self): + """asyncore function. Overriding children must invoke.""" self.close() def push (self, data): - self.producer_fifo.push (simple_producer (data)) + """Place data in the outgoing queue. + + Arguments: + data -- The expression bytes(data) must be meaningful + + Appends bytes(data) to the sequence of bytes to be sent + through the socket. Each time asyncore.loop discovers that + the socket is writable, some or all of the bytes in that queue + will be transmitted. + + """ + self.producer_fifo.push(simple_producer(bytes(data))) self.initiate_send() + def push_str(self, data, encoding): + """Place a string in the outgoing queue. + + Arguments: + data -- The expression data.encode(encoding) must be meaningful + + Appends bytes(data.encode(encoding)) to the sequence of bytes + to be sent through the socket. Each time asyncore.loop + discovers that the socket is writable, some or all of the + bytes in that queue will be transmitted. + + It is very important to be aware of byte ordering when using + this function. Some encodings produce endian-sentive results. + + """ + self.push(data.encode(encoding)) + + def push_iterable(self, iterable): + """Place the iterated items in the outgoing queue + + Arguments: + iterable -- [bytes(x) for x in iterable] must be meaningful + + Appends bytes(x) for each x in iterator to the sequence of + bytes to be sent through the socket. Each time asyncore.loop + discovers that the socket is writable, some or all of the + bytes in that queue will be transmitted. + + """ + self.producer_fifo.push(iterator_producer(iter(iterable))) + self.initiate_send() + def push_with_producer (self, producer): + """Place a producer in the outgoing queue. + + Arguments: + producer -- The expression producer.more() must be meaningful + + Repeatedly appends bytes(producer.more()) to the sequence of + bytes to be sent through the socket, until + bool(producer.more()) is False. Each time asyncore.loop + discovers that the socket is writable, some or all of the + bytes in that queue will be transmitted. + + """ self.producer_fifo.push (producer) self.initiate_send() @@ -189,6 +340,7 @@ # refill the outgoing buffer by calling the more() method # of the first producer in the queue def refill_buffer (self): + """Internal function. Do not invoke or override.""" while 1: if len(self.producer_fifo): p = self.producer_fifo.first() @@ -199,16 +351,12 @@ self.producer_fifo.pop() self.close() return - elif isinstance(p, str) or isinstance(p, bytes): - if isinstance(p, str): - p = p.encode('ascii') + elif isinstance(p, bytes): self.producer_fifo.pop() self.ac_out_buffer = self.ac_out_buffer + p return data = p.more() if data: - if isinstance(data, str): - data = data.encode('ascii') self.ac_out_buffer = self.ac_out_buffer + bytes(data) return else: @@ -217,6 +365,7 @@ return def initiate_send (self): + """Internal function. Do not invoke or override.""" obs = self.ac_out_buffer_size # try to refill the buffer if (len (self.ac_out_buffer) < obs): @@ -234,6 +383,12 @@ return def discard_buffers (self): + """Discard all incoming and outgoing data. + + It is generally wise to avoid the use of this function, as it + results in data loss. This is rarely useful. + + """ # Emergencies only! self.ac_in_buffer = b'' self.ac_out_buffer = b'' @@ -243,8 +398,31 @@ class simple_producer: + """Produce bytes for transmission. + + This is a very basic producer, used to implement + `async_chat.push`; the only extra functionality it provides is to + break the data up into blocks of arbitrary size, such that each + call to more produces a block of data no larger than the requested + size. + + """ + def __init__ (self, data, buffer_size=512): - self.data = data + """Initialize the producer with a byte sequence + + Arguments: + data -- bytes(data) must be meaningful. + + Optional arguments: + buffer_size -- A number, defaults to 512 + + The simple_producer will pop and return blocks of `data` of + length min(`buffer_size`, len(`data`)) until data is empty. + + """ + + self.data = bytes(data) self.buffer_size = buffer_size def more (self): @@ -257,6 +435,36 @@ self.data = b'' return result +class iterator_producer: + + """Produce bytes for transmission. + + iterator_producer is a wrapper around Python iterators, adapting + them to the older producer interface defined by this module. Using + iterator_producer, any iterator that produces a sequence of bytes + objects can be used as a producer. + + `async_chat.push_iterable` is a helper function which adds an + instance of iterator_producer to the sending queue. + + """ + + def __init__(self, itr): + """Initialize the producer with an iterator + + Arguments: + itr -- An iterator for which [bytes(x) for x in itr] works + + """ + + self.itr = itr + + def more(self): + try: + return self.itr.__next__() + except StopIteration: + return b'' + class fifo: def __init__ (self, list=None): if not list: @@ -298,6 +506,17 @@ # regex: 14035/s def find_prefix_at_end (haystack, needle): + """Returns > 0 if the beginning of needle is at the end of haystack + + Arguments: + haystack -- A bytes object. + needle -- A bytes object. + + Given haystack, see if any prefix of needle is at its end. This + assumes an exact match has already been checked. Return the + number of characters matched. + + """ l = len(needle) - 1 while l and not haystack.endswith(needle[:l]): l -= 1 Index: Lib/smtpd.py =================================================================== --- Lib/smtpd.py (revision 59392) +++ Lib/smtpd.py (working copy) @@ -123,12 +123,12 @@ self.__fqdn = socket.getfqdn() self.__peer = conn.getpeername() print('Peer:', repr(self.__peer), file=DEBUGSTREAM) - self.push('220 %s %s' % (self.__fqdn, __version__)) - self.set_terminator('\r\n') + self.push_str('220 %s %s' % (self.__fqdn, __version__), 'ascii') + self.set_terminator(b'\r\n') # Overrides base class for convenience def push(self, msg): - asynchat.async_chat.push(self, msg + '\r\n') + asynchat.async_chat.push(self, msg + b'\r\n') # Implementation of base class abstract method def collect_incoming_data(self, data): @@ -141,7 +141,7 @@ self.__line = [] if self.__state == self.COMMAND: if not line: - self.push('500 Error: bad syntax') + self.push_str('500 Error: bad syntax', 'ascii') return method = None i = line.find(' ') @@ -153,13 +153,13 @@ arg = line[i+1:].strip() method = getattr(self, 'smtp_' + command, None) if not method: - self.push('502 Error: command "%s" not implemented' % command) + self.push_str('502 Error: command "%s" not implemented' % command, 'ascii') return method(arg) return else: if self.__state != self.DATA: - self.push('451 Internal confusion') + self.push_str('451 Internal confusion', 'ascii') return # Remove extraneous carriage returns and de-transparency according # to RFC 821, Section 4.5.2. @@ -177,32 +177,32 @@ self.__rcpttos = [] self.__mailfrom = None self.__state = self.COMMAND - self.set_terminator('\r\n') + self.set_terminator(b'\r\n') if not status: - self.push('250 Ok') + self.push_str('250 Ok', 'ascii') else: - self.push(status) + self.push_str(status, 'ascii') # SMTP and ESMTP commands def smtp_HELO(self, arg): if not arg: - self.push('501 Syntax: HELO hostname') + self.push_str('501 Syntax: HELO hostname', 'ascii') return if self.__greeting: - self.push('503 Duplicate HELO/EHLO') + self.push_str('503 Duplicate HELO/EHLO', 'ascii') else: self.__greeting = arg - self.push('250 %s' % self.__fqdn) + self.push_str('250 %s' % self.__fqdn, 'ascii') def smtp_NOOP(self, arg): if arg: - self.push('501 Syntax: NOOP') + self.push_str('501 Syntax: NOOP', 'ascii') else: - self.push('250 Ok') + self.push_str('250 Ok', 'ascii') def smtp_QUIT(self, arg): # args is ignored - self.push('221 Bye') + self.push_str('221 Bye', 'ascii') self.close_when_done() # factored @@ -223,49 +223,49 @@ print('===> MAIL', arg, file=DEBUGSTREAM) address = self.__getaddr('FROM:', arg) if arg else None if not address: - self.push('501 Syntax: MAIL FROM:
') + self.push_str('501 Syntax: MAIL FROM:', 'ascii') return if self.__mailfrom: - self.push('503 Error: nested MAIL command') + self.push_str('503 Error: nested MAIL command', 'ascii') return self.__mailfrom = address print('sender:', self.__mailfrom, file=DEBUGSTREAM) - self.push('250 Ok') + self.push_str('250 Ok', 'ascii') def smtp_RCPT(self, arg): print('===> RCPT', arg, file=DEBUGSTREAM) if not self.__mailfrom: - self.push('503 Error: need MAIL command') + self.push_str('503 Error: need MAIL command', 'ascii') return address = self.__getaddr('TO:', arg) if arg else None if not address: - self.push('501 Syntax: RCPT TO: ') + self.push_str('501 Syntax: RCPT TO: ', 'ascii') return self.__rcpttos.append(address) print('recips:', self.__rcpttos, file=DEBUGSTREAM) - self.push('250 Ok') + self.push_str('250 Ok', 'ascii') def smtp_RSET(self, arg): if arg: - self.push('501 Syntax: RSET') + self.push_str('501 Syntax: RSET', 'ascii') return # Resets the sender, recipients, and data, but not the greeting self.__mailfrom = None self.__rcpttos = [] self.__data = '' self.__state = self.COMMAND - self.push('250 Ok') + self.push_str('250 Ok', 'ascii') def smtp_DATA(self, arg): if not self.__rcpttos: - self.push('503 Error: need RCPT command') + self.push_str('503 Error: need RCPT command', 'ascii') return if arg: - self.push('501 Syntax: DATA') + self.push_str('501 Syntax: DATA', 'ascii') return self.__state = self.DATA - self.set_terminator('\r\n.\r\n') - self.push('354 End data with