diff -r 5e8de100f708 Lib/lib2to3/fixes/fix_bytesliterals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/lib2to3/fixes/fix_bytesliterals.py Thu Oct 03 13:44:54 2013 +0300 @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +"""Fixer for non-ascii bytes literals. + +""" +# Author: Serhiy Storchaka + +# Local imports +from ..pgen2 import token +from .. import fixer_base + +class FixBytesliterals(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.STRING + + def match(self, node): + # Override + return node.value[0] in u'bB' + + def start_tree(self, tree, filename): + super(FixBytesliterals, self).start_tree(tree, filename) + self.encoding = tree.encoding or 'ascii' + + def transform(self, node, results): + val = node.value + try: + val.encode('ascii') + except UnicodeEncodeError: + self.warning(node, 'Value of bytes literal is encoding dependend') + if val[1] in u'rR': + assert val[0] in u'bB' + val = val[:1] + val[2:] + val = val.replace(u'\\', ur'\\') + val = val.encode(self.encoding).decode('latin1') + val = val.encode('ascii', 'backslashreplace').decode('ascii') + if val == node.value: + return node + new = node.clone() + new.value = val + return new diff -r 5e8de100f708 Lib/lib2to3/refactor.py --- a/Lib/lib2to3/refactor.py Thu Oct 03 12:08:22 2013 +0300 +++ b/Lib/lib2to3/refactor.py Thu Oct 03 13:44:54 2013 +0300 @@ -345,13 +345,13 @@ input += u"\n" # Silence certain parse errors if doctests_only: self.log_debug("Refactoring doctests in %s", filename) - output = self.refactor_docstring(input, filename) + output = self.refactor_docstring(input, filename, encoding) if self.write_unchanged_files or output != input: self.processed_file(output, filename, input, write, encoding) else: self.log_debug("No doctest changes in %s", filename) else: - tree = self.refactor_string(input, filename) + tree = self.refactor_string(input, filename, encoding) if self.write_unchanged_files or (tree and tree.was_changed): # The [:-1] is to take off the \n we added earlier self.processed_file(unicode(tree)[:-1], filename, @@ -359,7 +359,7 @@ else: self.log_debug("No changes in %s", filename) - def refactor_string(self, data, name): + def refactor_string(self, data, name, encoding=None): """Refactor a given input string. Args: @@ -381,6 +381,7 @@ return finally: self.driver.grammar = self.grammar + tree.encoding = encoding tree.future_features = features self.log_debug("Refactoring %s", name) self.refactor_tree(tree, name) @@ -549,7 +550,7 @@ PS1 = ">>> " PS2 = "... " - def refactor_docstring(self, input, filename): + def refactor_docstring(self, input, filename, encoding=None): """Refactors a docstring, looking for doctests. This returns a modified version of the input string. It looks @@ -571,7 +572,8 @@ if line.lstrip().startswith(self.PS1): if block is not None: result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) + indent, filename, + encoding=encoding)) block_lineno = lineno block = [line] i = line.find(self.PS1) @@ -583,16 +585,18 @@ else: if block is not None: result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) + indent, filename, + encoding=encoding)) block = None indent = None result.append(line) if block is not None: result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) + indent, filename, + encoding=encoding)) return u"".join(result) - def refactor_doctest(self, block, lineno, indent, filename): + def refactor_doctest(self, block, lineno, indent, filename, encoding=None): """Refactors one doctest. A doctest is given as a block of lines, the first of which starts @@ -609,6 +613,7 @@ self.log_error("Can't parse docstring in %s line %s: %s: %s", filename, lineno, err.__class__.__name__, err) return block + tree.encoding = encoding if self.refactor_tree(tree, filename): new = unicode(tree).splitlines(True) # Undo the adjustment of the line numbers in wrap_toks() below. diff -r 5e8de100f708 Lib/lib2to3/tests/test_fixers.py --- a/Lib/lib2to3/tests/test_fixers.py Thu Oct 03 12:08:22 2013 +0300 +++ b/Lib/lib2to3/tests/test_fixers.py Thu Oct 03 13:44:54 2013 +0300 @@ -26,30 +26,31 @@ self.refactor.post_order): fixer.log = self.fixer_log - def _check(self, before, after): + def _check(self, before, after, encoding=None): before = support.reformat(before) after = support.reformat(after) - tree = self.refactor.refactor_string(before, self.filename) + tree = self.refactor.refactor_string(before, self.filename, + encoding=encoding) self.assertEqual(after, unicode(tree)) return tree - def check(self, before, after, ignore_warnings=False): - tree = self._check(before, after) + def check(self, before, after, ignore_warnings=False, encoding=None): + tree = self._check(before, after, encoding=encoding) self.assertTrue(tree.was_changed) if not ignore_warnings: self.assertEqual(self.fixer_log, []) - def warns(self, before, after, message, unchanged=False): - tree = self._check(before, after) + def warns(self, before, after, message, unchanged=False, encoding=None): + tree = self._check(before, after, encoding=encoding) self.assertTrue(message in "".join(self.fixer_log)) if not unchanged: self.assertTrue(tree.was_changed) - def warns_unchanged(self, before, message): - self.warns(before, before, message, unchanged=True) - - def unchanged(self, before, ignore_warnings=False): - self._check(before, before) + def warns_unchanged(self, before, message, encoding=None): + self.warns(before, before, message, unchanged=True, encoding=encoding) + + def unchanged(self, before, ignore_warnings=False, encoding=None): + self._check(before, before, encoding=encoding) if not ignore_warnings: self.assertEqual(self.fixer_log, []) @@ -2861,6 +2862,23 @@ a = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'""" self.check(b, a) +class Test_bytesliterals(FixerTestCase): + fixer = "bytesliterals" + + def test_nonascii(self): + msg = 'Value of bytes literal is encoding dependend' + b = u"""b'\xa4\\n'""" + a = """b'\\xa4\\n'""" + self.warns(b, a, msg, encoding='iso8859-1') + b = u"""b'\u20ac\\n'""" + self.warns(b, a, msg, encoding='iso8859-15') + + b = u"""br'\xa4\\n'""" + a = """b'\\xa4\\\\n'""" + self.warns(b, a, msg, encoding='iso8859-1') + b = u"""br'\u20ac\\n'""" + self.warns(b, a, msg, encoding='iso8859-15') + class Test_callable(FixerTestCase): fixer = "callable"