Message 300043 - Python tracker

➜

This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author	tianlynn
Recipients	tianlynn
Date	2017-08-10.04:48:24
SpamBayes Score	-1.0
Marked as misclassified	Yes
Message-id	<1502340505.45.0.797067466934.issue31170@psf.upfronthosting.co.za>
In-reply-to

Content
utf8_toUtf8(const ENCODING UNUSED_P(enc), const char fromP, const char fromLim, char *toP, const char toLim) { char to; const char from; const char fromLimInitial = fromLim; / Avoid copying partial characters. / align_limit_to_full_utf8_characters(fromP, &fromLim); for (to = toP, from = fromP; (from < fromLim) && (to < toLim); from++, to++) to = from; fromP = from; toP = to; if (fromLim < fromLimInitial) return XML_CONVERT_INPUT_INCOMPLETE; else if ((to == toLim) && (from < fromLim)) // <===== Bug is here. In case (to == toLim), it's possible that // from is still pointing to partial character. For example, // a character with 3 bytes (A, B, C) and form is pointing to C. // It means only A and B is copied to output buffer. Next // scanning will start with C which could be considered as invalid // byte and got dropped. After this, only "AB" is kept in memory // and thus it will lead to invalid continuation byte. return XML_CONVERT_OUTPUT_EXHAUSTED; else return XML_CONVERT_COMPLETED; }

utf8_toUtf8(const ENCODING *UNUSED_P(enc),
            const char **fromP, const char *fromLim,
            char **toP, const char *toLim)
{
  char *to;
  const char *from;
  const char *fromLimInitial = fromLim;

  /* Avoid copying partial characters. */
  align_limit_to_full_utf8_characters(*fromP, &fromLim);

  for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
    *to = *from;
  *fromP = from;
  *toP = to;

  if (fromLim < fromLimInitial)
    return XML_CONVERT_INPUT_INCOMPLETE;
  else if ((to == toLim) && (from < fromLim))
    // <===== Bug is here. In case (to == toLim), it's possible that
    //        from is still pointing to partial character. For example,
    //        a character with 3 bytes (A, B, C) and form is pointing to C.
    //        It means only A and B is copied to output buffer. Next
    //        scanning will start with C which could be considered as invalid
    //        byte and got dropped. After this, only "AB" is kept in memory
    //        and thus it will lead to invalid continuation byte.
    return XML_CONVERT_OUTPUT_EXHAUSTED;
  else
    return XML_CONVERT_COMPLETED;
}

History
Date	User	Action	Args
2017-08-10 04:48:25	tianlynn	set	recipients: + tianlynn
2017-08-10 04:48:25	tianlynn	set	messageid: <1502340505.45.0.797067466934.issue31170@psf.upfronthosting.co.za>
2017-08-10 04:48:25	tianlynn	link	issue31170 messages
2017-08-10 04:48:24	tianlynn	create