diff -r 523cfc78847c Lib/lib2to3/fixes/fix_bytesliterals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/lib2to3/fixes/fix_bytesliterals.py Sun Sep 08 03:03:12 2013 +0300 @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +"""Fixer for non-ascii bytes literals. + +""" +# Author: Serhiy Storchaka + +# Local imports +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import String + +class FixBytesliterals(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.STRING + + def match(self, node): + # Override + return node.value[0] in u'bB' + + def start_tree(self, tree, filename): + super(FixBytesliterals, self).start_tree(tree, filename) + self.encoding = tree.encoding or 'ascii' + + def transform(self, node, results): + val = node.value + print 'transform', val + try: + val.encode('ascii') + except UnicodeEncodeError: + self.warning(node, 'Value of bytes literal is encoding dependend') + if val[1] in 'rR': + assert val[0] in u'bB' + val = val[:1] + val[2:] + val = val.replace(u'\\', ur'\\') + val = val.encode(self.encoding).decode('latin1') + val = val.encode('ascii', 'backslashreplace').decode('ascii') + if val == node.value: + return node + new = node.clone() + new.value = val + return new diff -r 523cfc78847c Lib/lib2to3/refactor.py --- a/Lib/lib2to3/refactor.py Sat Sep 07 15:23:36 2013 +0300 +++ b/Lib/lib2to3/refactor.py Sun Sep 08 03:03:12 2013 +0300 @@ -345,13 +345,13 @@ input += u"\n" # Silence certain parse errors if doctests_only: self.log_debug("Refactoring doctests in %s", filename) - output = self.refactor_docstring(input, filename) + output = self.refactor_docstring(input, filename, encoding) if self.write_unchanged_files or output != input: self.processed_file(output, filename, input, write, encoding) else: self.log_debug("No doctest changes in %s", filename) else: - tree = self.refactor_string(input, filename) + tree = self.refactor_string(input, filename, encoding) if self.write_unchanged_files or (tree and tree.was_changed): # The [:-1] is to take off the \n we added earlier self.processed_file(unicode(tree)[:-1], filename, @@ -359,7 +359,7 @@ else: self.log_debug("No changes in %s", filename) - def refactor_string(self, data, name): + def refactor_string(self, data, name, encoding=None): """Refactor a given input string. Args: @@ -381,6 +381,7 @@ return finally: self.driver.grammar = self.grammar + tree.encoding = encoding tree.future_features = features self.log_debug("Refactoring %s", name) self.refactor_tree(tree, name) @@ -549,7 +550,7 @@ PS1 = ">>> " PS2 = "... " - def refactor_docstring(self, input, filename): + def refactor_docstring(self, input, filename, encoding=None): """Refactors a docstring, looking for doctests. This returns a modified version of the input string. It looks @@ -592,7 +593,7 @@ indent, filename)) return u"".join(result) - def refactor_doctest(self, block, lineno, indent, filename): + def refactor_doctest(self, block, lineno, indent, filename, encoding=None): """Refactors one doctest. A doctest is given as a block of lines, the first of which starts @@ -609,6 +610,7 @@ self.log_error("Can't parse docstring in %s line %s: %s: %s", filename, lineno, err.__class__.__name__, err) return block + tree.encoding = encoding if self.refactor_tree(tree, filename): new = unicode(tree).splitlines(True) # Undo the adjustment of the line numbers in wrap_toks() below.