# workaround for unicode escaping buffer overflow,
# see http://sourceforge.net/tracker/index.php?func=detail&aid=1541585&group_id=5470&atid=305470
# written by G. Brandl and Th. Waldmann

if len(u'\U00010000') == 1:
    # UCS-4, needs fixing
    def fixed_unicode_escape(string, quotes):
        ret = qchar = ''
        if quotes:
            qchar = ("'" in string and not '"' in string) and '"' or "'"
        ret = 'u' + qchar

        for ch in string:
            och = ord(ch)
            # escape quotes and backslashes
            if ch == qchar or ch == '\\':
                ret += '\\' + str(ch)

            # map 21-bit characters to '\U00xxxxxx'
            elif och >= 0x10000:
                ret += '\\U%08x' % och

            # map 16-bit characters to '\uxxxx'
            elif och >= 0x100:
                ret += '\\u%04x' % och

            # map special whitespace to '\t', '\n', '\r'
            elif ch == '\t':
                ret += '\\t'
            elif ch == '\n':
                ret += '\\n'
            elif ch == '\r':
                ret += '\\r'

            # map non-printable US ASCII to '\xhh'
            elif och < 0x20 or och >= 0x7F:
                ret += '\\x%02x' % och
            else:
                ret += str(ch)

        if quotes:
            ret += qchar

        return ret

    def new_repr(x, old_repr=repr):
        if isinstance(x, unicode):
            return fixed_unicode_escape(x, 1)
        else:
            return old_repr(x)

    # patch the builtin repr with fixed implementation
    import __builtin__
    orig_repr = __builtin__.repr
    __builtin__.repr = new_repr

    import codecs
    from encodings import unicode_escape

    class Codec(codecs.Codec):
        def unicode_escape_encode(cls, inputobj, errors='strict'):
            return fixed_unicode_escape(inputobj, 0), len(inputobj)

        encode = classmethod(unicode_escape_encode)
        # Note: Binding this as C function will result in the class not
        # converting them to a method. This is intended.
        decode = codecs.unicode_escape_decode

    class StreamWriter(Codec, codecs.StreamWriter):
        pass

    class StreamReader(Codec, codecs.StreamReader):
        pass

    def getregentry():
        return (Codec.encode, Codec.decode, StreamReader, StreamWriter)

    # patch also the unicode_escape Codec:
    unicode_escape.Codec = Codec
    unicode_escape.StreamWriter = StreamWriter
    unicode_escape.StreamReader = StreamReader
    unicode_escape.getregentry = getregentry

else:
    # UCS-2, not vulnerable
    pass

if __name__ == '__main__':
    print "Trying to crash. If you have a non-fixed python 2.3/2.4, you'll see some msg from glibc."

    print "Trying repr..."
    assert(repr(u"\U00010000" * 39 + u"\uffff" * 4096) ==
           repr(u"\U00010000" * 39 + u"\uffff" * 4096))

    print "Trying encode unicode-escape ..."
    x = (u"\U00010000" * 39 + u"\uffff" * 4096).encode('unicode-escape')

    print "Finished."