Decode hostnames as ASCII/surrogateescape rather than UTF-8. diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -737,6 +737,13 @@ PyThread_type_lock netdb_lock; #endif +static PyObject * +unicode_from_hostname(const char *buf) +{ + return PyUnicode_DecodeASCII(buf, strlen(buf), "surrogateescape"); +} + + /* Convert a string specifying a host name or one of a few symbolic names to a numeric IP address. This usually calls gethostbyname() to do the work; the names "" and "" are special. @@ -2963,7 +2970,7 @@ socket_gethostname(PyObject *self, PyObj if (res < 0) return set_error(); buf[sizeof buf - 1] = '\0'; - return PyUnicode_FromString(buf); + return unicode_from_hostname(buf); } PyDoc_STRVAR(gethostname_doc, @@ -3045,7 +3052,7 @@ gethost_common(struct hostent *h, struct if (h->h_aliases) { for (pch = h->h_aliases; *pch != NULL; pch++) { int status; - tmp = PyUnicode_FromString(*pch); + tmp = unicode_from_hostname(*pch); if (tmp == NULL) goto err; @@ -3113,7 +3120,8 @@ gethost_common(struct hostent *h, struct goto err; } - rtn_tuple = Py_BuildValue("sOO", h->h_name, name_list, addr_list); + rtn_tuple = Py_BuildValue("NOO", unicode_from_hostname(h->h_name), + name_list, addr_list); err: Py_XDECREF(name_list); @@ -3879,9 +3887,9 @@ socket_getaddrinfo(PyObject *self, PyObj makesockaddr(-1, res->ai_addr, res->ai_addrlen, protocol); if (addr == NULL) goto err; - single = Py_BuildValue("iiisO", res->ai_family, + single = Py_BuildValue("iiiNO", res->ai_family, res->ai_socktype, res->ai_protocol, - res->ai_canonname ? res->ai_canonname : "", + unicode_from_hostname(res->ai_canonname ? res->ai_canonname : ""), addr); Py_DECREF(addr); if (single == NULL) @@ -3980,7 +3988,7 @@ socket_getnameinfo(PyObject *self, PyObj set_gaierror(error); goto fail; } - ret = Py_BuildValue("ss", hbuf, pbuf); + ret = Py_BuildValue("Ns", unicode_from_hostname(hbuf), pbuf); fail: if (res)