diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -242,38 +242,28 @@ i = 0 while True: try: - ctype = _winreg.EnumKey(mimedb, i) + yield _winreg.EnumKey(mimedb, i) except EnvironmentError: break - try: - ctype = ctype.encode(default_encoding) # omit in 3.x! - except UnicodeEncodeError: - pass - else: - yield ctype i += 1 - default_encoding = sys.getdefaultencoding() with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr: for subkeyname in enum_types(hkcr): - try: - with _winreg.OpenKey(hkcr, subkeyname) as subkey: - # Only check file extensions - if not subkeyname.startswith("."): - continue - # raises EnvironmentError if no 'Content Type' value + # Only check file extensions, not all possible classes + if not subkeyname.startswith("."): + continue + + with _winreg.OpenKey(hkcr, subkeyname) as subkey: + # If there is no "Content Type" value, or if it is not + # a simple string, simply skip + try: mimetype, datatype = _winreg.QueryValueEx( subkey, 'Content Type') - if datatype != _winreg.REG_SZ: - continue - try: - mimetype = mimetype.encode(default_encoding) - subkeyname = subkeyname.encode(default_encoding) - except UnicodeEncodeError: - continue - self.add_type(mimetype, subkeyname, strict) - except EnvironmentError: - continue + except EnvironmentError: + continue + if datatype != _winreg.REG_SZ: + continue + self.add_type(mimetype, subkeyname, strict) def guess_type(url, strict=True): """Guess the type of a file based on its URL. diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -71,8 +71,6 @@ # ensure all entries actually come from the Windows registry self.original_types_map = mimetypes.types_map.copy() mimetypes.types_map.clear() - mimetypes.init() - self.db = mimetypes.MimeTypes() def tearDown(self): # restore default settings @@ -84,14 +82,54 @@ # Windows registry is undocumented AFAIK. # Use file types that should *always* exist: eq = self.assertEqual - eq(self.db.guess_type("foo.txt"), ("text/plain", None)) - eq(self.db.guess_type("image.jpg"), ("image/jpeg", None)) - eq(self.db.guess_type("image.png"), ("image/png", None)) + mimetypes.init() + db = mimetypes.MimeTypes() + eq(db.guess_type("foo.txt"), ("text/plain", None)) + eq(db.guess_type("image.jpg"), ("image/jpeg", None)) + eq(db.guess_type("image.png"), ("image/png", None)) + + def test_non_latin_extension(self): + import _winreg + + class MockWinreg(object): + def __getattr__(self, name): + if name == 'EnumKey': + return lambda key, i: _winreg.EnumKey(key, i) + "\xa3" + elif name == "OpenKey": + return lambda key, name: _winreg.OpenKey(key, name.rstrip("\xa3")) + elif name == 'QueryValueEx': + return lambda subkey, label: (label + "\xa3", _winreg.REG_SZ) + return getattr(_winreg, name) + + mimetypes._winreg = MockWinreg() + try: + # this used to throw an exception if registry contained non-Latin + # characters in extensions (issue #9291) + mimetypes.init() + finally: + mimetypes._winreg = _winreg + + def test_non_latin_type(self): + import _winreg + + class MockWinreg(object): + def __getattr__(self, name): + if name == 'QueryValueEx': + return lambda subkey, label: (label + "\xa3", _winreg.REG_SZ) + return getattr(_winreg, name) + + mimetypes._winreg = MockWinreg() + try: + # this used to throw an exception if registry contained non-Latin + # characters in content types (issue #9291) + mimetypes.init() + finally: + mimetypes._winreg = _winreg def test_main(): test_support.run_unittest(MimeTypesTestCase, Win32MimeTypesTestCase - ) + ) if __name__ == "__main__":