Documentation and tests for AF_UNIX PEP 383 behaviour. Attributes the changes to Python 3.2. diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -34,8 +34,15 @@ in the C interface: as with :meth:`read` files, buffer allocation on receive operations is automatic, and buffer length is implicit on send operations. -Socket addresses are represented as follows: A single string is used for the -:const:`AF_UNIX` address family. A pair ``(host, port)`` is used for the +The address format required by a particular socket object is +automatically selected based on the address family specified when the +socket object was created. Socket addresses are represented as follows: + +``None`` does not represent an address in any family, but is returned +when the operating system does not return an address structure, as is +often the case when receiving data on a connected socket. + +A pair ``(host, port)`` is used for the :const:`AF_INET` address family, where *host* is a string representing either a hostname in Internet domain notation like ``'daring.cwi.nl'`` or an IPv4 address like ``'100.50.200.5'``, and *port* is an integral port number. For @@ -44,10 +51,7 @@ scopeid)`` is used, where *flowinfo* and and ``sin6_scope_id`` member in :const:`struct sockaddr_in6` in C. For :mod:`socket` module methods, *flowinfo* and *scopeid* can be omitted just for backward compatibility. Note, however, omission of *scopeid* can cause problems -in manipulating scoped IPv6 addresses. Other address families are currently not -supported. The address format required by a particular socket object is -automatically selected based on the address family specified when the socket -object was created. +in manipulating scoped IPv6 addresses. For IPv4 addresses, two special forms are accepted instead of a host address: the empty string represents :const:`INADDR_ANY`, and the string @@ -62,6 +66,23 @@ differently into an actual IPv4/v6 addre resolution and/or the host configuration. For deterministic behavior use a numeric address in *host* portion. +The address of an :const:`AF_UNIX` socket bound to a file system node +is represented as a string, using the file system encoding and the +``'surrogateescape'`` error handler (see :pep:`383`). An address in +Linux's abstract namespace is returned as a :class:`bytes` object with +an initial null byte; note that sockets in this namespace can +communicate with normal file system sockets, so programs intended to +run on Linux may need to deal with both types of address. A string or +:class:`bytes` object can be used for either type of address when +passing it as an argument. The address of an unbound :const:`AF_UNIX` +socket is usually returned as an empty string or ``None``; some +platforms can return the latter even in contexts where a real address +would normally be expected. + +.. versionchanged:: 3.2 + Previously, :const:`AF_UNIX` socket paths were assumed to use UTF-8 + encoding. + AF_NETLINK sockets are represented as pairs ``pid, groups``. diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -1498,6 +1498,71 @@ class TestLinuxAbstractNamespace(unittes s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.assertRaises(socket.error, s.bind, address) + def testStrName(self): + # Check that an abstract name can be passed as a string. + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + s.bind("\x00python\x00test\x00") + self.assertEqual(s.getsockname(), b"\x00python\x00test\x00") + finally: + s.close() + +class TestUnixDomain(unittest.TestCase): + + def setUp(self): + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + + def tearDown(self): + self.sock.close() + + def encoded(self, path): + # Return the given path encoded in the file system encoding, + # or skip the test if this is not possible. + try: + return os.fsencode(path) + except UnicodeEncodeError: + self.skipTest( + "Pathname {0!a} cannot be represented in file " + "system encoding {1!r}".format( + path, sys.getfilesystemencoding())) + + def testStrAddr(self): + # Test binding to and retrieving a normal string pathname. + path = os.path.abspath(support.TESTFN) + self.sock.bind(path) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testBytesAddr(self): + # Test binding to a bytes pathname. + path = os.path.abspath(support.TESTFN) + self.sock.bind(self.encoded(path)) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testSurrogateescapeBind(self): + # Test binding to a valid non-ASCII pathname, with the + # non-ASCII bytes supplied using surrogateescape encoding. + path = os.path.abspath(support.TESTFN_UNICODE) + b = self.encoded(path) + self.sock.bind(b.decode("ascii", "surrogateescape")) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testUndecodableAddr(self): + # Test binding to a non-ASCII pathname, then reading the + # address back with the file system encoding set to ASCII. + path = os.path.abspath(support.TESTFN_UNICODE) + b = self.encoded(path) + self.sock.bind(b) + self.addCleanup(support.unlink, path) + saved_encoding = sys.getfilesystemencoding() + try: + sys.setfilesystemencoding("ascii") + name = self.sock.getsockname() + finally: + sys.setfilesystemencoding(saved_encoding) + self.assertEqual(name, b.decode("ascii", "surrogateescape")) @unittest.skipUnless(thread, 'Threading required for this test.') class BufferIOTest(SocketConnectedTest): @@ -1705,6 +1770,8 @@ def test_main(): ]) if hasattr(socket, "socketpair"): tests.append(BasicSocketPairTest) + if hasattr(socket, "AF_UNIX"): + tests.append(TestUnixDomain) if sys.platform == 'linux2': tests.append(TestLinuxAbstractNamespace) tests.append(TestLinuxPathLen)