diff -r 712b4665955d Lib/ntpath.py --- a/Lib/ntpath.py Wed Feb 12 10:52:07 2014 +0200 +++ b/Lib/ntpath.py Thu Feb 13 15:56:46 2014 +0200 @@ -294,6 +294,13 @@ return path import string varchars = string.ascii_letters + string.digits + '_-' + if isinstance(path, unicode): + encoding = sys.getfilesystemencoding() + def getenv(var): + return os.environ[var.encode(encoding)].decode(encoding) + else: + def getenv(var): + return os.environ[var] res = '' index = 0 pathlen = len(path) @@ -322,9 +329,9 @@ index = pathlen - 1 else: var = path[:index] - if var in os.environ: - res = res + os.environ[var] - else: + try: + res = res + getenv(var) + except KeyError: res = res + '%' + var + '%' elif c == '$': # variable or '$$' if path[index + 1:index + 2] == '$': @@ -336,9 +343,9 @@ try: index = path.index('}') var = path[:index] - if var in os.environ: - res = res + os.environ[var] - else: + try: + res = res + getenv(var) + except KeyError: res = res + '${' + var + '}' except ValueError: res = res + '${' + path @@ -351,9 +358,9 @@ var = var + c index = index + 1 c = path[index:index + 1] - if var in os.environ: - res = res + os.environ[var] - else: + try: + res = res + getenv(var) + except KeyError: res = res + '$' + var if c != '': index = index - 1 diff -r 712b4665955d Lib/posixpath.py --- a/Lib/posixpath.py Wed Feb 12 10:52:07 2014 +0200 +++ b/Lib/posixpath.py Thu Feb 13 15:56:46 2014 +0200 @@ -285,28 +285,43 @@ # Non-existent variables are left unchanged. _varprog = None +_uvarprog = None def expandvars(path): """Expand shell variables of form $var and ${var}. Unknown variables are left unchanged.""" - global _varprog + global _varprog, _uvarprog if '$' not in path: return path - if not _varprog: - import re - _varprog = re.compile(r'\$(\w+|\{[^}]*\})') + if isinstance(path, _unicode): + if not _varprog: + import re + _varprog = re.compile(r'\$(\w+|\{[^}]*\})') + varprog = _varprog + encoding = sys.getfilesystemencoding() + else: + if not _uvarprog: + import re + _uvarprog = re.compile(_unicode(r'\$(\w+|\{[^}]*\})'), re.UNICODE) + varprog = _uvarprog + encoding = None i = 0 while True: - m = _varprog.search(path, i) + m = varprog.search(path, i) if not m: break i, j = m.span(0) name = m.group(1) if name.startswith('{') and name.endswith('}'): name = name[1:-1] + if encoding: + name = name.encode(encoding) if name in os.environ: tail = path[j:] - path = path[:i] + os.environ[name] + value = os.environ[name] + if encoding: + value = value.decode(encoding) + path = path[:i] + value i = len(path) path += tail else: diff -r 712b4665955d Lib/test/test_genericpath.py --- a/Lib/test/test_genericpath.py Wed Feb 12 10:52:07 2014 +0200 +++ b/Lib/test/test_genericpath.py Thu Feb 13 15:56:46 2014 +0200 @@ -199,13 +199,40 @@ self.assertEqual(expandvars("$[foo]bar"), "$[foo]bar") self.assertEqual(expandvars("$bar bar"), "$bar bar") self.assertEqual(expandvars("$?bar"), "$?bar") - self.assertEqual(expandvars("${foo}bar"), "barbar") self.assertEqual(expandvars("$foo}bar"), "bar}bar") self.assertEqual(expandvars("${foo"), "${foo") self.assertEqual(expandvars("${{foo}}"), "baz1}") self.assertEqual(expandvars("$foo$foo"), "barbar") self.assertEqual(expandvars("$bar$bar"), "$bar$bar") + @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII') + def test_expandvars_nonascii(self): + if self.pathmodule.__name__ == 'macpath': + self.skipTest('macpath.expandvars is a stub') + expandvars = self.pathmodule.expandvars + def check(value, expected): + self.assertEqual(expandvars(value), expected) + encoding = sys.getfilesystemencoding() + with test_support.EnvironmentVarGuard() as env: + env.clear() + unonascii = test_support.FS_NONASCII + snonascii = unonascii.encode(encoding) + env['spam'] = snonascii + env[snonascii] = 'ham' + snonascii + check(snonascii, snonascii) + check('$spam bar', '%s bar' % snonascii) + check('${spam}bar', '%sbar' % snonascii) + check('${%s}bar' % snonascii, 'ham%sbar' % snonascii) + check('$bar%s bar' % snonascii, '$bar%s bar' % snonascii) + check('$spam}bar', '%s}bar' % snonascii) + + check(unonascii, unonascii) + check(u'$spam bar', u'%s bar' % unonascii) + check(u'${spam}bar', u'%sbar' % unonascii) + check(u'${%s}bar' % unonascii, u'ham%sbar' % unonascii) + check(u'$bar%s bar' % unonascii, u'$bar%s bar' % unonascii) + check(u'$spam}bar', u'%s}bar' % unonascii) + def test_abspath(self): self.assertIn("foo", self.pathmodule.abspath("foo")) diff -r 712b4665955d Lib/test/test_ntpath.py --- a/Lib/test/test_ntpath.py Wed Feb 12 10:52:07 2014 +0200 +++ b/Lib/test/test_ntpath.py Thu Feb 13 15:56:46 2014 +0200 @@ -1,16 +1,19 @@ import ntpath import os +import sys from test.test_support import TestFailed from test import test_support, test_genericpath import unittest +def tester0(fn, wantResult): + gotResult = eval(fn) + if wantResult != gotResult: + raise TestFailed, "%s should return: %r but returned: %r" \ + %(fn, wantResult, gotResult) def tester(fn, wantResult): fn = fn.replace("\\", "\\\\") - gotResult = eval(fn) - if wantResult != gotResult: - raise TestFailed, "%s should return: %s but returned: %s" \ - %(str(fn), str(wantResult), str(gotResult)) + tester0(fn, wantResult) class TestNtpath(unittest.TestCase): @@ -173,7 +176,6 @@ tester('ntpath.expandvars("$[foo]bar")', "$[foo]bar") tester('ntpath.expandvars("$bar bar")', "$bar bar") tester('ntpath.expandvars("$?bar")', "$?bar") - tester('ntpath.expandvars("${foo}bar")', "barbar") tester('ntpath.expandvars("$foo}bar")', "bar}bar") tester('ntpath.expandvars("${foo")', "${foo") tester('ntpath.expandvars("${{foo}}")', "baz1}") @@ -187,6 +189,30 @@ tester('ntpath.expandvars("%foo%%bar")', "bar%bar") tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar") + @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII') + def test_expandvars_nonascii(self): + encoding = sys.getfilesystemencoding() + def check(value, expected): + tester0("ntpath.expandvars(%r)" % value, expected) + tester0("ntpath.expandvars(%r)" % value.decode(encoding), + expected.decode(encoding)) + with test_support.EnvironmentVarGuard() as env: + env.clear() + unonascii = test_support.FS_NONASCII + snonascii = unonascii.encode(encoding) + env['spam'] = snonascii + env[snonascii] = 'ham' + snonascii + check('$spam bar', '%s bar' % snonascii) + check('$%s bar' % snonascii, '$%s bar' % snonascii) + check('${spam}bar', '%sbar' % snonascii) + check('${%s}bar' % snonascii, 'ham%sbar' % snonascii) + check('$spam}bar', '%s}bar' % snonascii) + check('$%s}bar' % snonascii, '$%s}bar' % snonascii) + check('%spam% bar', '%s bar' % snonascii) + check('%{}% bar'.format(snonascii), 'ham%s bar' % snonascii) + check('%spam%bar', '%sbar' % snonascii) + check('%{}%bar'.format(snonascii), 'ham%sbar' % snonascii) + def test_abspath(self): # ntpath.abspath() can only be used on a system with the "nt" module # (reasonably), so we protect this test with "import nt". This allows diff -r 712b4665955d Lib/test/test_support.py --- a/Lib/test/test_support.py Wed Feb 12 10:52:07 2014 +0200 +++ b/Lib/test/test_support.py Thu Feb 13 15:56:46 2014 +0200 @@ -465,6 +465,52 @@ is_jython = sys.platform.startswith('java') +# FS_NONASCII: non-ASCII Unicode character encodable by +# sys.getfilesystemencoding(), or None if there is no such character. +FS_NONASCII = None +if have_unicode: + for character in ( + # First try printable and common characters to have a readable filename. + # For each character, the encoding list are just example of encodings able + # to encode the character (the list is not exhaustive). + + # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1 + unichr(0x00E6), + # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3 + unichr(0x0130), + # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257 + unichr(0x0141), + # U+03C6 (Greek Small Letter Phi): cp1253 + unichr(0x03C6), + # U+041A (Cyrillic Capital Letter Ka): cp1251 + unichr(0x041A), + # U+05D0 (Hebrew Letter Alef): Encodable to cp424 + unichr(0x05D0), + # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic + unichr(0x060C), + # U+062A (Arabic Letter Teh): cp720 + unichr(0x062A), + # U+0E01 (Thai Character Ko Kai): cp874 + unichr(0x0E01), + + # Then try more "special" characters. "special" because they may be + # interpreted or displayed differently depending on the exact locale + # encoding and the font. + + # U+00A0 (No-Break Space) + unichr(0x00A0), + # U+20AC (Euro Sign) + unichr(0x20AC), + ): + try: + character.encode(sys.getfilesystemencoding())\ + .decode(sys.getfilesystemencoding()) + except UnicodeError: + pass + else: + FS_NONASCII = character + break + # Filename used for testing if os.name == 'java': # Jython disallows @ in module names