diff -r 07571d2968b0 Doc/library/imaplib.rst --- a/Doc/library/imaplib.rst Sun May 03 13:00:37 2015 -0400 +++ b/Doc/library/imaplib.rst Sun May 03 20:22:26 2015 -0400 @@ -26,16 +26,24 @@ :rfc:`2060`. It is backward compatible with IMAP4 (:rfc:`1730`) servers, but note that the ``STATUS`` command is not supported in IMAP4. +The *enable_UTF8* constructor parameter may be used to turn on :rfc:`6855` +support. + Three classes are provided by the :mod:`imaplib` module, :class:`IMAP4` is the base class: -.. class:: IMAP4(host='', port=IMAP4_PORT) +.. class:: IMAP4(host='', port=IMAP4_PORT, enable_UTF8=False) This class implements the actual IMAP4 protocol. The connection is created and protocol version (IMAP4 or IMAP4rev1) is determined when the instance is initialized. If *host* is not specified, ``''`` (the local host) is used. If - *port* is omitted, the standard IMAP4 port (143) is used. + *port* is omitted, the standard IMAP4 port (143) is used. If *enable_UTF8* + is set to ``False`` (the default) the client operates in ASCII mode. If + it is set to ``None`` or ``True`` and the server advertises UTF8 + capability, the client operates in UTF8 mode (:rfc:`6844`). If + *enable_UTF8* is set to ``True``, an error will be raised if the server + does not support UTF8. The :class:`IMAP4` class supports the :keyword:`with` statement. When used like this, the IMAP4 ``LOGOUT`` command is issued automatically when the @@ -49,6 +57,7 @@ .. versionchanged:: 3.5 Support for the :keyword:`with` statement was added. + Support for :rfc:`6855` via the *enabled_UTF8* keword added. Three exceptions are defined as attributes of the :class:`IMAP4` class: @@ -77,12 +86,18 @@ There's also a subclass for secure connections: -.. class:: IMAP4_SSL(host='', port=IMAP4_SSL_PORT, keyfile=None, certfile=None, ssl_context=None) +.. class:: IMAP4_SSL(host='', port=IMAP4_SSL_PORT, keyfile=None, \ + certfile=None, ssl_context=None, enable_UTF8=False) This is a subclass derived from :class:`IMAP4` that connects over an SSL encrypted socket (to use this class you need a socket module that was compiled with SSL support). If *host* is not specified, ``''`` (the local host) is used. If *port* is omitted, the standard IMAP4-over-SSL port (993) is used. + If *enable_UTF8* is set to ``False`` (the default) the client operates in + ASCII mode. If it is set to ``None`` or ``True`` and the server advertises + UTF8 capability, the client operates in UTF8 mode (:rfc:`6844`). If + *enable_UTF8* is set to ``True``, an error will be raised if the server does + not support UTF8. *ssl_context* is a :class:`ssl.SSLContext` object which allows bundling SSL configuration options, certificates and private keys into a single (potentially long-lived) structure. Please read :ref:`ssl-security` for @@ -102,14 +117,25 @@ :attr:`ssl.SSLContext.check_hostname` and *Server Name Indication* (see :data:`ssl.HAS_SNI`). + .. versionadded:: 3.5 + Support for :rfc:`6855` via the *enable_UTF8* keyword. + The second subclass allows for connections created by a child process: -.. class:: IMAP4_stream(command) +.. class:: IMAP4_stream(command, enable_UTF8=False) This is a subclass derived from :class:`IMAP4` that connects to the ``stdin/stdout`` file descriptors created by passing *command* to ``subprocess.Popen()``. + If *enable_UTF8* is set to ``False`` (the default) the client operates in + ASCII mode. If it is set to ``None`` or ``True`` and the server advertises + UTF8 capability, the client operates in UTF8 mode (:rfc:`6844`). If + *enable_UTF8* is set to ``True``, an error will be raised if the server does + not support UTF8. + + .. versionadded:: 3.5 + Support for :rfc:`6855` via the *enable_UTF8* keyword. The following utility functions are defined: @@ -211,6 +237,15 @@ that will be base64 encoded and sent to the server. It should return ``None`` if the client abort response ``*`` should be sent instead. + If *enable_UTF8* was specified as ``None`` or ``True`` in the constructor + and the server supports UTF8, an :meth:`enable` command for ``UTF8=ACCEPT`` + will be automatically sent if authentication succeeds, and + :attr:`utf8_enabled` will be set to ``True``. + + .. versionchanged:: 3.5 + string usernames and passwords are now encoded to ``utf-8`` instead of + being limited to ASCII. + .. method:: IMAP4.check() @@ -243,6 +278,17 @@ Delete the ACLs (remove any rights) set for who on mailbox. +.. method:: IMAP4.enable(capability) + + Enable *capability* (see :rfc:`5161`). Most capabilities do not need to be + enabled. In addition, this command should *not* be used to enable the + ``UTF8=ACCEPT`` capability; instead use the *enable_UTF8* constructor + keyword parameter, which will also correctly update the client's internal + state and automatically send the ``enable`` after authenitcation completes. + + .. versionadded:: 3.5 + + .. method:: IMAP4.expunge() Permanently remove deleted items from selected mailbox. Generates an ``EXPUNGE`` @@ -380,7 +426,8 @@ Search mailbox for matching messages. *charset* may be ``None``, in which case no ``CHARSET`` will be specified in the request to the server. The IMAP protocol requires that at least one criterion be specified; an exception will be - raised when the server returns an error. + raised when the server returns an error. *charset* must be ``None`` if + :attr:`utf8_enabled` is ``True``. Example:: @@ -542,6 +589,15 @@ the module variable ``Debug``. Values greater than three trace each command. +.. attribute:: utf8_enabled + + Boolean value that is normally ``False``, but is set to ``True`` after + authentication completes if and only if the *enable_UTF8* constructor + argument was set to ``None`` or ``True`` and the server supports UTF8. + + .. versionadded:: 3.5 + + .. _imap4-example: IMAP4 Example diff -r 07571d2968b0 Lib/imaplib.py --- a/Lib/imaplib.py Sun May 03 13:00:37 2015 -0400 +++ b/Lib/imaplib.py Sun May 03 20:22:26 2015 -0400 @@ -66,6 +66,7 @@ 'CREATE': ('AUTH', 'SELECTED'), 'DELETE': ('AUTH', 'SELECTED'), 'DELETEACL': ('AUTH', 'SELECTED'), + 'ENABLE': ('AUTH', ), 'EXAMINE': ('AUTH', 'SELECTED'), 'EXPUNGE': ('SELECTED',), 'FETCH': ('SELECTED',), @@ -107,12 +108,17 @@ br' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' br' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])' br'"') +# Literal is no longer used; kept for backward compatibility. Literal = re.compile(br'.*{(?P\d+)}$', re.ASCII) MapCRLF = re.compile(br'\r\n|\r|\n') Response_code = re.compile(br'\[(?P[A-Z-]+)( (?P[^\]]*))?\]') Untagged_response = re.compile(br'\* (?P[A-Z-]+)( (?P.*))?') +# Untagged_status is no longer used; kept for backward compatibility Untagged_status = re.compile( br'\* (?P\d+) (?P[A-Z-]+)( (?P.*))?', re.ASCII) +# We compile these in _mode_xxx. +_Literal = br'.*{(?P\d+)}$' +_Untagged_status = br'\* (?P\d+) (?P[A-Z-]+)( (?P.*))?' @@ -120,10 +126,12 @@ """IMAP4 client class. - Instantiate with: IMAP4([host[, port]]) + Instantiate with: IMAP4([host[, port[, enable_UTF8]]]) host - host's name (default: localhost); port - port number (default: standard IMAP4 port). + enable_UTF8 - if True, require RFC6855 UTF8 support, + if None, enable it iff the server supports it. All IMAP4rev1 commands are supported by methods of the same name (in lower-case). @@ -166,7 +174,7 @@ class abort(error): pass # Service errors - close and retry class readonly(abort): pass # Mailbox status changed to READ-ONLY - def __init__(self, host = '', port = IMAP4_PORT): + def __init__(self, host='', port=IMAP4_PORT, enable_UTF8=False): self.debug = Debug self.state = 'LOGOUT' self.literal = None # A literal argument to a command @@ -176,6 +184,7 @@ self.is_readonly = False # READ-ONLY desired state self.tagnum = 0 self._tls_established = False + self._mode_ascii() # Open socket to server. @@ -190,6 +199,31 @@ pass raise + server_has_utf8 = any(x.startswith('UTF8=') for x in self.capabilities) + if enable_UTF8 is None: + enable_UTF8 = server_has_utf8 + elif enable_UTF8 and not server_has_utf8: + self.sock.close() + raise IMAP4.abort('Server does not support UTF8') + elif not enable_UTF8 and 'UTF8=ONLY' in self.capabilities: + self.sock.close() + raise IMAP4.abort('Server requires UTF8') + self._enable_UTF8 = enable_UTF8 + + + def _mode_ascii(self): + self.utf8_enabled = False + self._encoding = 'ascii' + self.Literal = re.compile(_Literal, re.ASCII) + self.Untagged_status = re.compile(_Untagged_status, re.ASCII) + + + def _mode_utf8(self): + self.utf8_enabled = True + self._encoding = 'utf-8' + self.Literal = re.compile(_Literal) + self.Untagged_status = re.compile(_Untagged_status) + def _connect(self): # Create unique tag for this session, @@ -360,7 +394,10 @@ date_time = Time2Internaldate(date_time) else: date_time = None - self.literal = MapCRLF.sub(CRLF, message) + literal = MapCRLF.sub(CRLF, message) + if self.utf8_enabled: + literal = b'UTF8 (' + literal + b')' + self.literal = literal return self._simple_command(name, mailbox, flags, date_time) @@ -379,6 +416,10 @@ response argument it is passed will be a bytes. It should return bytes data that will be base64 encoded and sent to the server. It should return None if the client abort response '*' should be sent instead. + + If the server supports UTF8 and the constructor enable_UTF8 argument + was not False, an enable('UTF8=ACCEPT') will be sent to the server if + authentication succeeds. """ mech = mechanism.upper() # XXX: shouldn't this code be removed, not commented out? @@ -390,6 +431,11 @@ if typ != 'OK': raise self.error(dat[-1]) self.state = 'AUTH' + if self._enable_UTF8: + # We've already checked that the server supports it in __init__. + typ2, _ = self.enable('UTF8=ACCEPT') + if typ2 == 'OK': + self._mode_utf8() return typ, dat @@ -455,6 +501,15 @@ """ return self._simple_command('DELETEACL', mailbox, who) + def enable(self, capability): + """Send an RFC5161 enable string to the server. + + (typ, [data]) = .enable(capability) + """ + if 'ENABLE' not in self.capabilities: + raise IMAP4.error("Server does not support ENABLE") + return self._simple_command('ENABLE', capability) + def expunge(self): """Permanently remove deleted items from selected mailbox. @@ -561,7 +616,7 @@ def _CRAM_MD5_AUTH(self, challenge): """ Authobject to use with CRAM-MD5 authentication. """ import hmac - pwd = (self.password.encode('ASCII') if isinstance(self.password, str) + pwd = (self.password.encode('utf-8') if isinstance(self.password, str) else self.password) return self.user + " " + hmac.HMAC(pwd, challenge, 'md5').hexdigest() @@ -661,9 +716,12 @@ (typ, [data]) = .search(charset, criterion, ...) 'data' is space separated list of matching message numbers. + If UTF8 is enabled, charset MUST be None. """ name = 'SEARCH' if charset: + if self.utf8_enabled: + raise IMAP4.error("Non-None charset not valid in UTF8 mode") typ, dat = self._simple_command(name, 'CHARSET', charset, *criteria) else: typ, dat = self._simple_command(name, *criteria) @@ -877,7 +935,7 @@ def _check_bye(self): bye = self.untagged_responses.get('BYE') if bye: - raise self.abort(bye[-1].decode('ascii', 'replace')) + raise self.abort(bye[-1].decode(self._encoding, 'replace')) def _command(self, name, *args): @@ -898,12 +956,12 @@ raise self.readonly('mailbox status changed to READ-ONLY') tag = self._new_tag() - name = bytes(name, 'ASCII') + name = bytes(name, self._encoding) data = tag + b' ' + name for arg in args: if arg is None: continue if isinstance(arg, str): - arg = bytes(arg, "ASCII") + arg = bytes(arg, self._encoding) data = data + b' ' + arg literal = self.literal @@ -913,7 +971,7 @@ literator = literal else: literator = None - data = data + bytes(' {%s}' % len(literal), 'ASCII') + data = data + bytes(' {%s}' % len(literal), self._encoding) if __debug__: if self.debug >= 4: @@ -978,7 +1036,7 @@ typ, dat = self.capability() if dat == [None]: raise self.error('no CAPABILITY response from server') - dat = str(dat[-1], "ASCII") + dat = str(dat[-1], self._encoding) dat = dat.upper() self.capabilities = tuple(dat.split()) @@ -997,10 +1055,10 @@ if self._match(self.tagre, resp): tag = self.mo.group('tag') if not tag in self.tagged_commands: - raise self.abort('unexpected tagged response: %s' % resp) + raise self.abort('unexpected tagged response: %r' % resp) typ = self.mo.group('type') - typ = str(typ, 'ASCII') + typ = str(typ, self._encoding) dat = self.mo.group('data') self.tagged_commands[tag] = (typ, [dat]) else: @@ -1009,7 +1067,7 @@ # '*' (untagged) responses? if not self._match(Untagged_response, resp): - if self._match(Untagged_status, resp): + if self._match(self.Untagged_status, resp): dat2 = self.mo.group('data2') if self.mo is None: @@ -1019,17 +1077,17 @@ self.continuation_response = self.mo.group('data') return None # NB: indicates continuation - raise self.abort("unexpected response: '%s'" % resp) + raise self.abort("unexpected response: %r" % resp) typ = self.mo.group('type') - typ = str(typ, 'ascii') + typ = str(typ, self._encoding) dat = self.mo.group('data') if dat is None: dat = b'' # Null untagged response if dat2: dat = dat + b' ' + dat2 # Is there a literal to come? - while self._match(Literal, dat): + while self._match(self.Literal, dat): # Read literal direct from connection. @@ -1053,7 +1111,7 @@ if typ in ('OK', 'NO', 'BAD') and self._match(Response_code, dat): typ = self.mo.group('type') - typ = str(typ, "ASCII") + typ = str(typ, self._encoding) self._append_untagged(typ, self.mo.group('data')) if __debug__: @@ -1123,7 +1181,7 @@ def _new_tag(self): - tag = self.tagpre + bytes(str(self.tagnum), 'ASCII') + tag = self.tagpre + bytes(str(self.tagnum), self._encoding) self.tagnum = self.tagnum + 1 self.tagged_commands[tag] = None return tag @@ -1198,7 +1256,8 @@ """IMAP4 client class over SSL connection - Instantiate with: IMAP4_SSL([host[, port[, keyfile[, certfile[, ssl_context]]]]]) + Instantiate with: IMAP4_SSL([host[, port[, keyfile[, certfile[, + ssl_context[, enable_UTF8]]]]]]) host - host's name (default: localhost); port - port number (default: standard IMAP4 SSL port); @@ -1208,12 +1267,15 @@ and private key (default: None) Note: if ssl_context is provided, then parameters keyfile or certfile should not be set otherwise ValueError is raised. + enable_UTF8 - if True, require RFC6855 UTF8 support, + if None, enable it iff the server supports it. for more documentation see the docstring of the parent class IMAP4. """ - def __init__(self, host='', port=IMAP4_SSL_PORT, keyfile=None, certfile=None, ssl_context=None): + def __init__(self, host='', port=IMAP4_SSL_PORT, keyfile=None, + certfile=None, ssl_context=None, enable_UTF8=False): if ssl_context is not None and keyfile is not None: raise ValueError("ssl_context and keyfile arguments are mutually " "exclusive") @@ -1227,7 +1289,7 @@ ssl_context = ssl._create_stdlib_context(certfile=certfile, keyfile=keyfile) self.ssl_context = ssl_context - IMAP4.__init__(self, host, port) + IMAP4.__init__(self, host, port, enable_UTF8=enable_UTF8) def _create_socket(self): sock = IMAP4._create_socket(self) @@ -1249,17 +1311,19 @@ """IMAP4 client class over a stream - Instantiate with: IMAP4_stream(command) + Instantiate with: IMAP4_stream(command[, enable_UTF8]) - where "command" is a string that can be passed to subprocess.Popen() + "command" - a string that can be passed to subprocess.Popen() + enable_UTF8 - if True, require RFC6855 UTF8 support, + if None, enable it iff the server supports it. for more documentation see the docstring of the parent class IMAP4. """ - def __init__(self, command): + def __init__(self, command, enable_UTF8=False): self.command = command - IMAP4.__init__(self) + IMAP4.__init__(self, enable_UTF8=enable_UTF8) def open(self, host = None, port = None): @@ -1328,7 +1392,7 @@ # oup = b'' if isinstance(inp, str): - inp = inp.encode('ASCII') + inp = inp.encode('utf-8') while inp: if len(inp) > 48: t = inp[:48] diff -r 07571d2968b0 Lib/test/test_imaplib.py --- a/Lib/test/test_imaplib.py Sun May 03 13:00:37 2015 -0400 +++ b/Lib/test/test_imaplib.py Sun May 03 20:22:26 2015 -0400 @@ -226,9 +226,9 @@ self.reap_server(server, thread) @contextmanager - def reaped_pair(self, hdlr): + def reaped_pair(self, hdlr, **client_kwds): with self.reaped_server(hdlr) as server: - client = self.imap_class(*server.server_address) + client = self.imap_class(*server.server_address, **client_kwds) try: yield server, client finally: @@ -265,6 +265,112 @@ self.assertRaises(imaplib.IMAP4.abort, self.imap_class, *server.server_address) + class UTF8Server(SimpleIMAPHandler): + capabilities = 'AUTH ENABLE UTF8=ACCEPT' + + def cmd_ENABLE(self, tag, args): + self._send_tagged(tag, 'OK', 'ENABLE successful') + + def cmd_AUTHENTICATE(self, tag, args): + self._send_textline('+') + self.server.response = yield + self._send_tagged(tag, 'OK', 'FAKEAUTH successful') + + @reap_threads + def test_enable_raises_error_if_not_AUTH(self): + with self.reaped_pair(self.UTF8Server) as (server, client): + self.assertFalse(client.utf8_enabled) + self.assertRaises(imaplib.IMAP4.error, client.enable, 'foo') + self.assertFalse(client.utf8_enabled) + + # XXX also need a test that confirms enable is not allowed after SELECT. + + @reap_threads + def test_enable_raises_error_if_no_capability(self): + class NoEnableServer(self.UTF8Server): + capabilities = 'AUTH' + with self.reaped_pair(NoEnableServer) as (server, client): + self.assertRaises(imaplib.IMAP4.error, client.enable, 'foo') + + @reap_threads + def test_enable_UTF8_True_raises_error_if_not_supported(self): + class NonUTF8Server(SimpleIMAPHandler): + pass + with self.assertRaises(imaplib.IMAP4.abort): + with self.reaped_pair(NonUTF8Server, enable_UTF8=True): + pass + + @reap_threads + def test_enable_UTF8_accept_False_raises_error_if_only_utf8(self): + class UTF8OnlyServer(SimpleIMAPHandler): + capabilities = 'AUTH UTF8=ONLY' + with self.assertRaises(imaplib.IMAP4.abort): + with self.reaped_pair(UTF8OnlyServer, enable_UTF8=False): + pass + + @reap_threads + def test_enable_UTF8_False_no_utf8_even_if_server_supports_it(self): + with self.reaped_pair(self.UTF8Server, + enable_UTF8=False) as (server, client): + self.assertFalse(client.utf8_enabled) + code, data = client.authenticate('MYAUTH', lambda x: b'fake') + self.assertFalse(client.utf8_enabled) + + @reap_threads + def test_enable_UTF8_None_enables_if_server_supports_it(self): + with self.reaped_pair(self.UTF8Server, + enable_UTF8=None) as (server, client): + self.assertFalse(client.utf8_enabled) + code, data = client.authenticate('MYAUTH', lambda x: b'fake') + self.assertTrue(client.utf8_enabled) + + @reap_threads + def test_enable_UTF8_None_not_enabled_if_no_server_utf8(self): + class NonUTF8Server(self.UTF8Server): + capabilities = 'AUTH' + with self.reaped_pair(NonUTF8Server, + enable_UTF8=None) as (server, client): + self.assertFalse(client.utf8_enabled) + code, data = client.authenticate('MYAUTH', lambda x: b'fake') + self.assertFalse(client.utf8_enabled) + + @reap_threads + def test_enable_UTF8_True_append(self): + + class UTF8AppendServer(self.UTF8Server): + def cmd_APPEND(self, tag, args): + self._send_textline('+') + self.server.response = yield + self._send_tagged(tag, 'OK', 'okay') + + with self.reaped_pair(UTF8AppendServer, + enable_UTF8=True) as (server, client): + self.assertEqual(client._encoding, 'ascii') + code, data = client.authenticate('MYAUTH', lambda x: b'fake') + self.assertEqual(code, 'OK') + self.assertEqual(server.response, + b'ZmFrZQ==\r\n') # b64 encoded 'fake' + self.assertEqual(client._encoding, 'utf-8') + msg_string = 'Subject: üñí©öðé' + typ, data = client.append( + None, None, None, msg_string.encode('utf-8')) + self.assertEqual(typ, 'OK') + self.assertEqual( + server.response, + ('UTF8 (%s)\r\n' % msg_string).encode('utf-8') + ) + + # XXX also need a test that makes sure that the Literal and Untagged_status + # regexes uses unicode in UTF8 mode instead of the default ASCII. + + @reap_threads + def test_search_disallows_charset_in_utf8_mode(self): + with self.reaped_pair(self.UTF8Server, + enable_UTF8=True) as (server, client): + code, data = client.authenticate('MYAUTH', lambda x: b'fake') + self.assertTrue(client.utf8_enabled) + self.assertRaises(imaplib.IMAP4.error, client.search, 'foo', 'bar') + @reap_threads def test_bad_auth_name(self):