Message100010
> --- python/trunk/Objects/unicodeobject.c (original)
> > +++ python/trunk/Objects/unicodeobject.c Wed Feb 24 00:16:07 2010
> > @@ -8170,6 +8170,7 @@
> > size_t buflen,
> > PyObject *v)
> > {
> > + PyObject *s;
> > /* presume that the buffer is at least 2 characters long */
> > if (PyUnicode_Check(v)) {
> > if (PyUnicode_GET_SIZE(v) != 1)
> > @@ -8180,7 +8181,14 @@
> > else if (PyString_Check(v)) {
> > if (PyString_GET_SIZE(v) != 1)
> > goto onError;
> > - buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
> > + /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
> > + string, "u'%c' % char" should fail with a UnicodeDecodeError */
> > + s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
> > + /* if the char is not decodable return -1 */
> > + if (s == NULL)
> > + return -1;
> > + buf[0] = PyUnicode_AS_UNICODE(s)[0];
> > + Py_DECREF(s);
That's a *very* inefficient way of doing this.
Could you please check for chars above 0x7f first and then use
PyUnicode_Decode() instead of the PyUnicode_FromStringAndSize()
API (this API should not have been backported from the Python 3.x
in Python 2.6, but it's too late to change that now) !
Thanks. |
|
Date |
User |
Action |
Args |
2010-02-24 08:15:43 | lemburg | set | recipients:
+ lemburg, doerwalter, vstinner, eric.smith, ezio.melotti, flox |
2010-02-24 08:15:42 | lemburg | set | messageid: <1266999342.42.0.873236815287.issue7649@psf.upfronthosting.co.za> |
2010-02-24 08:15:40 | lemburg | link | issue7649 messages |
2010-02-24 08:15:38 | lemburg | create | |
|