Index: Lib/mimetypes.py =================================================================== --- Lib/mimetypes.py (revision 74455) +++ Lib/mimetypes.py (working copy) @@ -1,152 +1,152 @@ """Guess the MIME type of a file. -This module defines two useful functions: +This module defines three useful functions: -guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. +guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. -guess_extension(type, strict=1) -- guess the extension for a given MIME type. +guess_extension(type, strict=True) -- guess the extension for a given MIME type. +guess_all_extensions(type, strict=True) -- list all guessed extensions for a type. + It also contains the following, for tuning the behavior: +Classes: + +MimeTypes -- The type mapping data store. Use this if you plan to change the +default type mappings, and call its methods in lieu of the above functions. + +Functions: + +read_mime_types(file) -- parse one file in Apache mime.types format, returning +the resulting mapping from extensions to types. + +init() -- reset the module singleton to default values. Should only be used +with care, because it could break other code using the module. + Data: -knownfiles -- list of files to parse -inited -- flag set when init() has been called +These mappings should be treated as read-only. Altering them could break any +code which makes assumptions about their content. + suffix_map -- dictionary mapping suffixes to suffixes encodings_map -- dictionary mapping suffixes to encodings types_map -- dictionary mapping suffixes to types - -Functions: - -init([files]) -- parse a list of files, default knownfiles -read_mime_types(file) -- parse one file, return a dictionary or None """ -import os import posixpath import urllib +from textwrap import dedent +from collections import defaultdict +import warnings __all__ = [ - "guess_type","guess_extension","guess_all_extensions", - "add_type","read_mime_types","init" + 'MimeTypes', 'guess_type', 'guess_extension', 'guess_all_extensions', + 'add_type', 'read_mime_types', 'parse_mimetypes', ] knownfiles = [ - "/etc/mime.types", - "/etc/httpd/mime.types", # Mac OS X - "/etc/httpd/conf/mime.types", # Apache + # later files will override type mappings from earlier ones + "/usr/local/lib/netscape/mime.types", # Netscape "/etc/apache/mime.types", # Apache 1 - "/etc/apache2/mime.types", # Apache 2 - "/usr/local/etc/httpd/conf/mime.types", - "/usr/local/lib/netscape/mime.types", + "/etc/httpd/conf/mime.types", # Apache 1.3 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 "/usr/local/etc/mime.types", # Apache 1.3 - ] + "/etc/httpd/mime.types", # early Mac OS X + "/usr/local/etc/httpd/conf/mime.types", + "/etc/mime.types", # Linux + "/etc/apache2/mime.types", # Apache 2 + "/usr/local/apache2/conf/mime.types", # Apache 2 +] -inited = False -_db = None - - -class MimeTypes: +class MimeTypes(object): """MIME-types datastore. - This datastore can handle information from mime.types-style files - and supports basic determination of MIME type from a filename or - URL, and can guess a reasonable extension given a MIME type. + This datastore can handle information from mime.types-style files and + supports basic determination of MIME type from a filename or URL, and can + guess a reasonable extension given a MIME type. """ - - def __init__(self, filenames=(), strict=True): - if not inited: - init() - self.encodings_map = encodings_map.copy() - self.suffix_map = suffix_map.copy() + def __init__(self, filenames=knownfiles, strict=True): + self.encodings_map = _encodings_map.copy() + self.suffix_map = _suffix_map.copy() self.types_map = ({}, {}) # dict for (non-strict, strict) - self.types_map_inv = ({}, {}) - for (ext, type) in types_map.items(): - self.add_type(type, ext, True) - for (ext, type) in common_types.items(): - self.add_type(type, ext, False) - for name in filenames: - self.read(name, strict) + self.types_map_inv = (defaultdict(list), defaultdict(list)) + self.register(_strict_types, True) + self.register(_lenient_types, False) + for filename in filenames: + try: + self.register(parse_mimetypes(filename), strict) + except IOError: + continue + def register(self, pairs, strict=True): + """Add a set of mappings from extension -> MIME type + + Takes in an iterable of (extension, type) pairs. For known extensions + the new types will replace the old ones. For known types, the + extension will be added to the list of known extensions. + + If `strict' is true, information will be added to list of standard + types, else to the list of non-standard types. + """ + for ext, type in pairs: + self.types_map[strict][ext] = type.lower() + ext_list = self.types_map_inv[strict][type] + if ext not in ext_list: + ext_list.append(ext) + def add_type(self, type, ext, strict=True): """Add a mapping between a type and an extension. - When the extension is already known, the new - type will replace the old one. When the type - is already known the extension will be added - to the list of known extensions. + Passes a single-element dictionary to `register`. - If strict is true, information will be added to - list of standard types, else to the list of non-standard - types. + Deprecated: Call MimeTypes.register([(ext, type)]) instead. """ - self.types_map[strict][ext] = type - exts = self.types_map_inv[strict].setdefault(type, []) - if ext not in exts: - exts.append(ext) + warnings.warn("deprecated", DeprecationWarning) + self.register([(type, ext)], strict) + def _type_from_data_url(self, url): + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + if ',' not in url: + return None, None # bad data URL + type = url.split(',', 1)[0].split(';', 1)[0] # up to first , or ; + if '=' in type or '/' not in type: + type = 'text/plain' # type/subtype defaults to "text/plain" + return type + def guess_type(self, url, strict=True): """Guess the type of a file based on its URL. - Return value is a tuple (type, encoding) where type is None if - the type can't be guessed (no or unknown suffix) or a string - of the form type/subtype, usable for a MIME Content-type - header; and encoding is None for no encoding or the name of - the program used to encode (e.g. compress or gzip). The - mappings are table driven. Encoding suffixes are case - sensitive; type suffixes are first tried case sensitive, then - case insensitive. + Return value is a tuple (type, encoding) where type is None if the + type can't be guessed (no or unknown suffix) or a string of the form + type/subtype, usable for a MIME Content-type header; and encoding is + None for no encoding or the name of the program used to encode (e.g. + compress or gzip). Encoding suffixes are case sensitive; type suffixes + are first tried case sensitive, then case insensitive. - The suffixes .tgz, .taz and .tz (case sensitive!) are all - mapped to '.tar.gz'. (This is table-driven too, using the - dictionary suffix_map.) + The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped to + '.tar.gz'. (This is table-driven too, using self.suffix_map.) - Optional `strict' argument when False adds a bunch of commonly found, - but non-standard types. + Optional `strict' argument when False adds commonly found but + non-standard types. """ scheme, url = urllib.splittype(url) if scheme == 'data': - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - # type/subtype defaults to "text/plain" - comma = url.find(',') - if comma < 0: - # bad data URL - return None, None - semi = url.find(';', 0, comma) - if semi >= 0: - type = url[:semi] - else: - type = url[:comma] - if '=' in type or '/' not in type: - type = 'text/plain' - return type, None # never compressed, so encoding is None + return self._type_from_data_url(url), None # data URLs are never compressed base, ext = posixpath.splitext(url) - while ext in self.suffix_map: + if ext in self.suffix_map: base, ext = posixpath.splitext(base + self.suffix_map[ext]) - if ext in self.encodings_map: - encoding = self.encodings_map[ext] + encoding = self.encodings_map.get(ext) + if encoding: base, ext = posixpath.splitext(base) - else: - encoding = None - types_map = self.types_map[True] - if ext in types_map: - return types_map[ext], encoding - elif ext.lower() in types_map: - return types_map[ext.lower()], encoding - elif strict: - return None, encoding - types_map = self.types_map[False] - if ext in types_map: - return types_map[ext], encoding - elif ext.lower() in types_map: - return types_map[ext.lower()], encoding - else: - return None, encoding + loose_map, strict_map = self.types_map + type = strict_map.get(ext) or strict_map.get(ext.lower()) + if not strict: + type = type or loose_map.get(ext) or loose_map.get(ext.lower()) + return type, encoding def guess_all_extensions(self, type, strict=True): """Guess the extensions for a file based on its MIME type. @@ -156,8 +156,8 @@ guaranteed to have been associated with any particular data stream, but would be mapped to the MIME type `type' by guess_type(). - Optional `strict' argument when false adds a bunch of commonly found, - but non-standard types. + Optional `strict' argument when False adds commonly found but + non-standard types. """ type = type.lower() extensions = self.types_map_inv[True].get(type, []) @@ -170,369 +170,303 @@ def guess_extension(self, type, strict=True): """Guess the extension for a file based on its MIME type. - Return value is a string giving a filename extension, - including the leading dot ('.'). The extension is not - guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None - is returned. + Return value is a string giving a filename extension, including the + leading dot ('.'). The extension is not guaranteed to have been + associated with any particular data stream, but would be mapped to the + MIME type `type' by guess_type(). If no extension can be guessed for + `type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, - but non-standard types. + Optional `strict' argument when False adds commonly found but + non-standard types. """ extensions = self.guess_all_extensions(type, strict) - if not extensions: - return None - return extensions[0] + return extensions[0] if extensions else None def read(self, filename, strict=True): + """Read a single mime.types-format file, specified by pathname, and + add its contents to the type mappings. Originally called at instance + initialization, but now only included for backwards compatibility and + unused internally. + + Raises an exception if the file can't be opened or read. """ - Read a single mime.types-format file, specified by pathname. + warnings.warn("MimeTypes.read is deprecated.", DeprecationWarning) + self.register(parse_mimetypes(filename), strict) - If strict is true, information will be added to - list of standard types, else to the list of non-standard - types. - """ - fp = open(filename) - self.readfp(fp, strict) - fp.close() - def readfp(self, fp, strict=True): + """Read a single mime.types-format file. Originally a helper for the + `read' method, now only included for backwards compatibility and + unused internally. """ - Read a single mime.types-format file. + warnings.warn("MimeTypes.readfp is deprecated.", DeprecationWarning) + self.register(_parse_mimetypes(fp), strict) - If strict is true, information will be added to - list of standard types, else to the list of non-standard - types. - """ - while 1: - line = fp.readline() - if not line: - break - words = line.split() - for i in range(len(words)): - if words[i][0] == '#': - del words[i:] - break - if not words: - continue - type, suffixes = words[0], words[1:] - for suff in suffixes: - self.add_type(type, '.' + suff, strict) - -def guess_type(url, strict=True): - """Guess the type of a file based on its URL. - - Return value is a tuple (type, encoding) where type is None if the - type can't be guessed (no or unknown suffix) or a string of the - form type/subtype, usable for a MIME Content-type header; and - encoding is None for no encoding or the name of the program used - to encode (e.g. compress or gzip). The mappings are table - driven. Encoding suffixes are case sensitive; type suffixes are - first tried case sensitive, then case insensitive. - - The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped - to ".tar.gz". (This is table-driven too, using the dictionary - suffix_map). - - Optional `strict' argument when false adds a bunch of commonly found, but - non-standard types. +def _parse_mimetypes(types_lines): + """Parse Apache mime.types format. + + Takes an iterable of lines. Returns an iterable of (ext, type) pairs. """ - if _db is None: - init() - return _db.guess_type(url, strict) + # XXX This function exists to support deprecated functions and methods + # and uses the same code as parse_mimetypes. Once those are gone, remove + # this function too. + for line in types_lines: + words = line.split('#', 1)[0].split() + if not words: + continue + type = words[0] + for extension in words[1:]: + yield ('.' + extension, type) - -def guess_all_extensions(type, strict=True): - """Guess the extensions for a file based on its MIME type. - - Return value is a list of strings giving the possible filename - extensions, including the leading dot ('.'). The extension is not - guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None - is returned. - - Optional `strict' argument when false adds a bunch of commonly found, - but non-standard types. +def parse_mimetypes(filename): + """ Parse a file in Apache mime.types format. + + Takes a filename and returns an iterable of (ext, type) pairs, suitable + for passing to the register method of a MimeTypes object. """ - if _db is None: - init() - return _db.guess_all_extensions(type, strict) + with open(filename) as mimetypes_file: + for line in mimetypes_file: + words = line.split('#', 1)[0].split() + if not words: + continue + type = words[0] + for extension in words[1:]: + yield ('.' + extension, type) -def guess_extension(type, strict=True): - """Guess the extension for a file based on its MIME type. +def read_mime_types(filename): + """Read MIME types from a file. - Return value is a string giving a filename extension, including the - leading dot ('.'). The extension is not guaranteed to have been - associated with any particular data stream, but would be mapped to the - MIME type `type' by guess_type(). If no extension can be guessed for - `type', None is returned. - - Optional `strict' argument when false adds a bunch of commonly found, - but non-standard types. + Read a file in the format of Apache's mime.types, and returns a mapping + from extensions to MIME types, or returns None if an I/O Error occurs. + + Deprecated: Instead, call `dict(parse_mimetypes(filename))` or + `list(parse_mimetypes(filename))`. """ - if _db is None: - init() - return _db.guess_extension(type, strict) - -def add_type(type, ext, strict=True): - """Add a mapping between a type and an extension. - - When the extension is already known, the new - type will replace the old one. When the type - is already known the extension will be added - to the list of known extensions. - - If strict is true, information will be added to - list of standard types, else to the list of non-standard - types. - """ - if _db is None: - init() - return _db.add_type(type, ext, strict) - - -def init(files=None): - global suffix_map, types_map, encodings_map, common_types - global inited, _db - inited = True # so that MimeTypes.__init__() doesn't call us again - db = MimeTypes() - if files is None: - files = knownfiles - for file in files: - if os.path.isfile(file): - db.readfp(open(file)) - encodings_map = db.encodings_map - suffix_map = db.suffix_map - types_map = db.types_map[True] - common_types = db.types_map[False] - # Make the DB a global variable now that it is fully initialized - _db = db - - -def read_mime_types(file): + warnings.warn("read_mime_types is deprecated", DeprecationWarning) try: - f = open(file) + return dict(parse_mimetypes(filename)) except IOError: return None - db = MimeTypes() - db.readfp(f, True) - return db.types_map[True] - -def _default_mime_types(): - global suffix_map - global encodings_map - global types_map - global common_types - - suffix_map = { - '.tgz': '.tar.gz', - '.taz': '.tar.gz', - '.tz': '.tar.gz', - '.tbz2': '.tar.bz2', - } - - encodings_map = { - '.gz': 'gzip', - '.Z': 'compress', - '.bz2': 'bzip2', - } - - # Before adding new types, make sure they are either registered with IANA, - # at http://www.isi.edu/in-notes/iana/assignments/media-types - # or extensions, i.e. using the x- prefix - - # If you add to these, please keep them sorted! - types_map = { - '.a' : 'application/octet-stream', - '.ai' : 'application/postscript', - '.aif' : 'audio/x-aiff', - '.aifc' : 'audio/x-aiff', - '.aiff' : 'audio/x-aiff', - '.au' : 'audio/basic', - '.avi' : 'video/x-msvideo', - '.bat' : 'text/plain', - '.bcpio' : 'application/x-bcpio', - '.bin' : 'application/octet-stream', - '.bmp' : 'image/x-ms-bmp', - '.c' : 'text/plain', - # Duplicates :( - '.cdf' : 'application/x-cdf', - '.cdf' : 'application/x-netcdf', - '.cpio' : 'application/x-cpio', - '.csh' : 'application/x-csh', - '.css' : 'text/css', - '.dll' : 'application/octet-stream', - '.doc' : 'application/msword', - '.dot' : 'application/msword', - '.dvi' : 'application/x-dvi', - '.eml' : 'message/rfc822', - '.eps' : 'application/postscript', - '.etx' : 'text/x-setext', - '.exe' : 'application/octet-stream', - '.gif' : 'image/gif', - '.gtar' : 'application/x-gtar', - '.h' : 'text/plain', - '.hdf' : 'application/x-hdf', - '.htm' : 'text/html', - '.html' : 'text/html', - '.ief' : 'image/ief', - '.jpe' : 'image/jpeg', - '.jpeg' : 'image/jpeg', - '.jpg' : 'image/jpeg', - '.js' : 'application/x-javascript', - '.ksh' : 'text/plain', - '.latex' : 'application/x-latex', - '.m1v' : 'video/mpeg', - '.man' : 'application/x-troff-man', - '.me' : 'application/x-troff-me', - '.mht' : 'message/rfc822', - '.mhtml' : 'message/rfc822', - '.mif' : 'application/x-mif', - '.mov' : 'video/quicktime', - '.movie' : 'video/x-sgi-movie', - '.mp2' : 'audio/mpeg', - '.mp3' : 'audio/mpeg', - '.mp4' : 'video/mp4', - '.mpa' : 'video/mpeg', - '.mpe' : 'video/mpeg', - '.mpeg' : 'video/mpeg', - '.mpg' : 'video/mpeg', - '.ms' : 'application/x-troff-ms', - '.nc' : 'application/x-netcdf', - '.nws' : 'message/rfc822', - '.o' : 'application/octet-stream', - '.obj' : 'application/octet-stream', - '.oda' : 'application/oda', - '.p12' : 'application/x-pkcs12', - '.p7c' : 'application/pkcs7-mime', - '.pbm' : 'image/x-portable-bitmap', - '.pdf' : 'application/pdf', - '.pfx' : 'application/x-pkcs12', - '.pgm' : 'image/x-portable-graymap', - '.pl' : 'text/plain', - '.png' : 'image/png', - '.pnm' : 'image/x-portable-anymap', - '.pot' : 'application/vnd.ms-powerpoint', - '.ppa' : 'application/vnd.ms-powerpoint', - '.ppm' : 'image/x-portable-pixmap', - '.pps' : 'application/vnd.ms-powerpoint', - '.ppt' : 'application/vnd.ms-powerpoint', - '.ps' : 'application/postscript', - '.pwz' : 'application/vnd.ms-powerpoint', - '.py' : 'text/x-python', - '.pyc' : 'application/x-python-code', - '.pyo' : 'application/x-python-code', - '.qt' : 'video/quicktime', - '.ra' : 'audio/x-pn-realaudio', - '.ram' : 'application/x-pn-realaudio', - '.ras' : 'image/x-cmu-raster', - '.rdf' : 'application/xml', - '.rgb' : 'image/x-rgb', - '.roff' : 'application/x-troff', - '.rtx' : 'text/richtext', - '.sgm' : 'text/x-sgml', - '.sgml' : 'text/x-sgml', - '.sh' : 'application/x-sh', - '.shar' : 'application/x-shar', - '.snd' : 'audio/basic', - '.so' : 'application/octet-stream', - '.src' : 'application/x-wais-source', - '.sv4cpio': 'application/x-sv4cpio', - '.sv4crc' : 'application/x-sv4crc', - '.swf' : 'application/x-shockwave-flash', - '.t' : 'application/x-troff', - '.tar' : 'application/x-tar', - '.tcl' : 'application/x-tcl', - '.tex' : 'application/x-tex', - '.texi' : 'application/x-texinfo', - '.texinfo': 'application/x-texinfo', - '.tif' : 'image/tiff', - '.tiff' : 'image/tiff', - '.tr' : 'application/x-troff', - '.tsv' : 'text/tab-separated-values', - '.txt' : 'text/plain', - '.ustar' : 'application/x-ustar', - '.vcf' : 'text/x-vcard', - '.wav' : 'audio/x-wav', - '.wiz' : 'application/msword', - '.wsdl' : 'application/xml', - '.xbm' : 'image/x-xbitmap', - '.xlb' : 'application/vnd.ms-excel', - # Duplicates :( - '.xls' : 'application/excel', - '.xls' : 'application/vnd.ms-excel', - '.xml' : 'text/xml', - '.xpdl' : 'application/xml', - '.xpm' : 'image/x-xpixmap', - '.xsl' : 'application/xml', - '.xwd' : 'image/x-xwindowdump', - '.zip' : 'application/zip', - } - +_suffix_map = { + '.tgz': '.tar.gz', + '.taz': '.tar.gz', + '.tz': '.tar.gz', + '.tbz2': '.tar.bz2', +} +_encodings_map = { + '.gz': 'gzip', + '.Z': 'compress', + '.bz2': 'bzip2', +} +_strict_types = [ + # IANA registry: http://www.isi.edu/in-notes/iana/assignments/media-types + # Apache: http://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types + + # later rows override previous rows for the extension -> type map, but add + # themselves to the end of the list for the type -> extension map + ('.a' , 'application/octet-stream'), + ('.ai' , 'application/postscript'), + ('.aif' , 'audio/x-aiff'), + ('.aifc' , 'audio/x-aiff'), + ('.aiff' , 'audio/x-aiff'), + ('.au' , 'audio/basic'), + ('.avi' , 'video/x-msvideo'), + ('.bat' , 'text/plain'), + ('.bcpio' , 'application/x-bcpio'), + ('.bin' , 'application/octet-stream'), + ('.bmp' , 'image/x-ms-bmp'), + ('.c' , 'text/plain'), + ('.cdf' , 'application/x-cdf'), + ('.cdf' , 'application/x-netcdf'), # canonical + ('.cpio' , 'application/x-cpio'), + ('.csh' , 'application/x-csh'), + ('.css' , 'text/css'), + ('.dll' , 'application/octet-stream'), + ('.doc' , 'application/msword'), + ('.dot' , 'application/msword'), + ('.dvi' , 'application/x-dvi'), + ('.eml' , 'message/rfc822'), + ('.eps' , 'application/postscript'), + ('.etx' , 'text/x-setext'), + ('.exe' , 'application/octet-stream'), + ('.gif' , 'image/gif'), + ('.gtar' , 'application/x-gtar'), + ('.h' , 'text/plain'), + ('.hdf' , 'application/x-hdf'), + ('.htm' , 'text/html'), + ('.html' , 'text/html'), + ('.ief' , 'image/ief'), + ('.jpe' , 'image/jpeg'), + ('.jpeg' , 'image/jpeg'), + ('.jpg' , 'image/jpeg'), + ('.js' , 'application/x-javascript'), + ('.ksh' , 'text/plain'), + ('.latex' , 'application/x-latex'), + ('.m1v' , 'video/mpeg'), + ('.man' , 'application/x-troff-man'), + ('.me' , 'application/x-troff-me'), + ('.mht' , 'message/rfc822'), + ('.mhtml' , 'message/rfc822'), + ('.mif' , 'application/x-mif'), + ('.mov' , 'video/quicktime'), + ('.movie' , 'video/x-sgi-movie'), + ('.mp2' , 'audio/mpeg'), + ('.mp3' , 'audio/mpeg'), + ('.mp4' , 'video/mp4'), + ('.mpa' , 'video/mpeg'), + ('.mpe' , 'video/mpeg'), + ('.mpeg' , 'video/mpeg'), + ('.mpg' , 'video/mpeg'), + ('.ms' , 'application/x-troff-ms'), + ('.nc' , 'application/x-netcdf'), + ('.nws' , 'message/rfc822'), + ('.o' , 'application/octet-stream'), + ('.obj' , 'application/octet-stream'), + ('.oda' , 'application/oda'), + ('.p12' , 'application/x-pkcs12'), + ('.p7c' , 'application/pkcs7-mime'), + ('.pbm' , 'image/x-portable-bitmap'), + ('.pdf' , 'application/pdf'), + ('.pfx' , 'application/x-pkcs12'), + ('.pgm' , 'image/x-portable-graymap'), + ('.pl' , 'text/plain'), + ('.png' , 'image/png'), + ('.pnm' , 'image/x-portable-anymap'), + ('.pot' , 'application/vnd.ms-powerpoint'), + ('.ppa' , 'application/vnd.ms-powerpoint'), + ('.ppm' , 'image/x-portable-pixmap'), + ('.pps' , 'application/vnd.ms-powerpoint'), + ('.ppt' , 'application/vnd.ms-powerpoint'), + ('.ps' , 'application/postscript'), + ('.pwz' , 'application/vnd.ms-powerpoint'), + ('.py' , 'text/x-python'), + ('.pyc' , 'application/x-python-code'), + ('.pyo' , 'application/x-python-code'), + ('.qt' , 'video/quicktime'), + ('.ra' , 'audio/x-pn-realaudio'), + ('.ram' , 'application/x-pn-realaudio'), + ('.ras' , 'image/x-cmu-raster'), + ('.rdf' , 'application/xml'), + ('.rgb' , 'image/x-rgb'), + ('.roff' , 'application/x-troff'), + ('.rtx' , 'text/richtext'), + ('.sgm' , 'text/x-sgml'), + ('.sgml' , 'text/x-sgml'), + ('.sh' , 'application/x-sh'), + ('.shar' , 'application/x-shar'), + ('.snd' , 'audio/basic'), + ('.so' , 'application/octet-stream'), + ('.src' , 'application/x-wais-source'), + ('.sv4cpio', 'application/x-sv4cpio'), + ('.sv4crc' , 'application/x-sv4crc'), + ('.swf' , 'application/x-shockwave-flash'), + ('.t' , 'application/x-troff'), + ('.tar' , 'application/x-tar'), + ('.tcl' , 'application/x-tcl'), + ('.tex' , 'application/x-tex'), + ('.texi' , 'application/x-texinfo'), + ('.texinfo', 'application/x-texinfo'), + ('.tif' , 'image/tiff'), + ('.tiff' , 'image/tiff'), + ('.tr' , 'application/x-troff'), + ('.tsv' , 'text/tab-separated-values'), + ('.txt' , 'text/plain'), + ('.ustar' , 'application/x-ustar'), + ('.vcf' , 'text/x-vcard'), + ('.wav' , 'audio/x-wav'), + ('.wiz' , 'application/msword'), + ('.wsdl' , 'application/xml'), + ('.xbm' , 'image/x-xbitmap'), + ('.xlb' , 'application/vnd.ms-excel'), + ('.xls' , 'application/excel'), + ('.xls' , 'application/vnd.ms-excel'), # canonical + ('.xml' , 'text/xml'), + ('.xpdl' , 'application/xml'), + ('.xpm' , 'image/x-xpixmap'), + ('.xsl' , 'application/xml'), + ('.xwd' , 'image/x-xwindowdump'), + ('.zip' , 'application/zip'), +] +_lenient_types = [ # These are non-standard types, commonly found in the wild. They will # only match if strict=0 flag is given to the API methods. + ('.jpg' , 'image/jpg'), + ('.mid' , 'audio/midi'), + ('.midi', 'audio/midi'), + ('.pct' , 'image/pict'), + ('.pic' , 'image/pict'), + ('.pict', 'image/pict'), + ('.rtf' , 'application/rtf'), + ('.xul' , 'text/xul'), +] - # Please sort these too - common_types = { - '.jpg' : 'image/jpg', - '.mid' : 'audio/midi', - '.midi': 'audio/midi', - '.pct' : 'image/pict', - '.pic' : 'image/pict', - '.pict': 'image/pict', - '.rtf' : 'application/rtf', - '.xul' : 'text/xul' - } +def _init_singleton(_filenames=knownfiles): + # remove _filenames argument when the init funciton is removed. + global _db, guess_type, guess_extension, guess_all_extensions, add_type + global register, encodings_map, suffix_map, types_map, common_types + _db = MimeTypes(filenames=_filenames) + guess_type = _db.guess_type + guess_all_extensions = _db.guess_all_extensions + guess_extension = _db.guess_extension + add_type = _db.add_type + encodings_map = _db.encodings_map + suffix_map = _db.suffix_map + types_map, common_types = _db.types_map + + # inited and init (the next 18 LOC) included for backwards compatibility. + global inited + inited = True +def init(files=knownfiles): + """Re-initialize the module. -_default_mime_types() + Specifically, re-initialize singleton MimeTypes object and global + functions and objects. + This should be used with care, because resetting the module to defaults + could break other third-party code which has changed the contents of these + objects and has expectations about their state. -if __name__ == '__main__': - import sys - import getopt + To make custom type mappings, use your own MimeTypes instance instead. + + init and the module global inited flag are deprecated.""" + warnings.warn("init and inited are deprecated", DeprecationWarning) + return _init_singleton(_filenames=files) - USAGE = """\ -Usage: mimetypes.py [options] type +_init_singleton() # initialize the module -Options: - --help / -h -- print this message and exit - --lenient / -l -- additionally search of some common, but non-standard - types. - --extension / -e -- guess extension instead of type - -More than one type argument may be given. -""" - - def usage(code, msg=''): +if __name__ == '__main__': + import sys + from getopt import getopt, GetoptError + USAGE = dedent("""\ + Usage: mimetypes.py [options] type + + Options: + --help / -h -- print this message and exit + --lenient / -l -- additionally search common non-standard types. + --extension / -e -- guess extension instead of type + + More than one type argument may be given.""") + try: + shortopts, longopts = 'hle', ['help', 'lenient', 'extension'] + opts, args = getopt(sys.argv[1:], shortopts, longopts) + except GetoptError, msg: + print msg print USAGE - if msg: print msg - sys.exit(code) + sys.exit(1) - try: - opts, args = getopt.getopt(sys.argv[1:], 'hle', - ['help', 'lenient', 'extension']) - except getopt.error, msg: - usage(1, msg) - - strict = 1 - extension = 0 - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-l', '--lenient'): - strict = 0 - elif opt in ('-e', '--extension'): - extension = 1 + opts = set(opt for opt, arg in opts) + if '-h' in opts or '--help' in opts: + print USAGE + sys.exit(0) + strict = not ('-l' in opts or '--lenient' in opts) for gtype in args: - if extension: + if '-e' in opts or '--extension' in opts: guess = guess_extension(gtype, strict) - if not guess: print "I don't know anything about type", gtype - else: print guess + print guess or "I don't know anything about type %s" % (gtype,) else: guess, encoding = guess_type(gtype, strict) - if not guess: print "I don't know anything about type", gtype - else: print 'type:', guess, 'encoding:', encoding + if not guess: + print "I don't know anything about type", gtype + else: + print 'type: %s, encoding: %s' % (guess, encoding) \ No newline at end of file Index: Lib/test/test_mimetypes.py =================================================================== --- Lib/test/test_mimetypes.py (revision 74455) +++ Lib/test/test_mimetypes.py (working copy) @@ -7,12 +7,12 @@ # Tell it we don't know about external files: mimetypes.knownfiles = [] mimetypes.inited = False -mimetypes._default_mime_types() +mimetypes._init_singleton() class MimeTypesTestCase(unittest.TestCase): def setUp(self): - self.db = mimetypes.MimeTypes() + self.db = mimetypes.MimeTypes(filenames=[]) def test_default_data(self): eq = self.assertEqual