# HG changeset patch # Parent a497e7faa09b9a836983635b96adbf216a39e4f5 Decode hostnames as ASCII/surrogateescape (except socket.gethostname() on Windows). diff --git a/Doc/library/os.rst b/Doc/library/os.rst --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -587,12 +587,21 @@ process and user. :func:`socket.gethostname` or even ``socket.gethostbyaddr(socket.gethostname())``. + The strings are converted using the file system encoding and + the ``'surrogateescape'`` error handler, except for + :attr:`nodename`, which is converted as ASCII with the + ``'surrogateescape'`` error handler (see the :mod:`socket` + module documentation for details). + Availability: recent flavors of Unix. .. versionchanged:: 3.3 Return type changed from a tuple to a tuple-like object with named attributes. + .. versionchanged:: XXX + The :attr:`nodename` attribute is now converted as + ASCII/``surrogateescape``. .. function:: unsetenv(key) diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -124,6 +124,14 @@ differently into an actual IPv4/v6 addre resolution and/or the host configuration. For deterministic behavior use a numeric address in *host* portion. +When a hostname is returned by a system interface (except +:func:`gethostname` on Windows), it is decoded into a string +using the ``'ascii'`` codec and the ``'surrogateescape'`` error +handler; this leaves ASCII bytes as ASCII, including IDNA +ASCII-compatible encodings (see :mod:`encodings.idna`), but +converts any non-ASCII bytes to the Unicode lone surrogate codes +U+DC80...U+DCFF. + All errors raise exceptions. The normal exceptions for invalid argument types and out-of-memory conditions can be raised; starting from Python 3.3, errors related to socket or address semantics raise :exc:`OSError` or one of its diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4464,6 +4464,7 @@ os_uname_impl(PyModuleDef *module) struct utsname u; int res; PyObject *value; + PyObject *o; Py_BEGIN_ALLOW_THREADS res = uname(&u); @@ -4486,7 +4487,13 @@ os_uname_impl(PyModuleDef *module) } \ SET(0, u.sysname); - SET(1, u.nodename); + o = PyUnicode_DecodeASCII(u.nodename, strlen(u.nodename), + "surrogateescape"); + if (!o) { + Py_DECREF(value); + return NULL; + } + PyStructSequence_SET_ITEM(value, 1, o); SET(2, u.release); SET(3, u.version); SET(4, u.machine); diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -895,6 +895,15 @@ static PyThread_type_lock netdb_lock; #endif +/* Return the string representation for the given hostname. */ + +static PyObject * +decode_hostname(const char *name) +{ + return PyUnicode_DecodeASCII(name, strlen(name), "surrogateescape"); +} + + /* Convert a string specifying a host name or one of a few symbolic names to a numeric IP address. This usually calls gethostbyname() to do the work; the names "" and "" are special. @@ -4440,7 +4449,7 @@ socket_gethostname(PyObject *self, PyObj if (res < 0) return set_error(); buf[sizeof buf - 1] = '\0'; - return PyUnicode_DecodeFSDefault(buf); + return decode_hostname(buf); #endif } @@ -4562,7 +4571,7 @@ gethost_common(struct hostent *h, struct if (h->h_aliases) { for (pch = h->h_aliases; *pch != NULL; pch++) { int status; - tmp = PyUnicode_FromString(*pch); + tmp = decode_hostname(*pch); if (tmp == NULL) goto err; @@ -4630,7 +4639,8 @@ gethost_common(struct hostent *h, struct goto err; } - rtn_tuple = Py_BuildValue("sOO", h->h_name, name_list, addr_list); + rtn_tuple = Py_BuildValue("NOO", decode_hostname(h->h_name), + name_list, addr_list); err: Py_XDECREF(name_list); @@ -5573,9 +5583,9 @@ socket_getaddrinfo(PyObject *self, PyObj makesockaddr(-1, res->ai_addr, res->ai_addrlen, protocol); if (addr == NULL) goto err; - single = Py_BuildValue("iiisO", res->ai_family, + single = Py_BuildValue("iiiNO", res->ai_family, res->ai_socktype, res->ai_protocol, - res->ai_canonname ? res->ai_canonname : "", + decode_hostname(res->ai_canonname ? res->ai_canonname : ""), addr); Py_DECREF(addr); if (single == NULL) @@ -5681,7 +5691,7 @@ socket_getnameinfo(PyObject *self, PyObj set_gaierror(error); goto fail; } - ret = Py_BuildValue("ss", hbuf, pbuf); + ret = Py_BuildValue("Ns", decode_hostname(hbuf), pbuf); fail: if (res)