Index: Lib/mimetypes.py
===================================================================
--- Lib/mimetypes.py	(revision 74455)
+++ Lib/mimetypes.py	(working copy)
@@ -1,152 +1,152 @@
 """Guess the MIME type of a file.
 
-This module defines two useful functions:
+This module defines three useful functions:
 
-guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
+guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
 
-guess_extension(type, strict=1) -- guess the extension for a given MIME type.
+guess_extension(type, strict=True) -- guess the extension for a given MIME type.
 
+guess_all_extensions(type, strict=True) -- list all guessed extensions for a type.
+
 It also contains the following, for tuning the behavior:
 
+Classes:
+
+MimeTypes -- The type mapping data store. Use this if you plan to change the
+default type mappings, and call its methods in lieu of the above functions.
+
+Functions:
+
+read_mime_types(file) -- parse one file in Apache mime.types format, returning
+the resulting mapping from extensions to types.
+
+init() -- reset the module singleton to default values. Should only be used
+with care, because it could break other code using the module.
+
 Data:
 
-knownfiles -- list of files to parse
-inited -- flag set when init() has been called
+These mappings should be treated as read-only. Altering them could break any
+code which makes assumptions about their content.
+
 suffix_map -- dictionary mapping suffixes to suffixes
 encodings_map -- dictionary mapping suffixes to encodings
 types_map -- dictionary mapping suffixes to types
-
-Functions:
-
-init([files]) -- parse a list of files, default knownfiles
-read_mime_types(file) -- parse one file, return a dictionary or None
 """
 
-import os
 import posixpath
 import urllib
+from textwrap import dedent
+from collections import defaultdict
+import warnings
 
 __all__ = [
-    "guess_type","guess_extension","guess_all_extensions",
-    "add_type","read_mime_types","init"
+    'MimeTypes', 'guess_type', 'guess_extension', 'guess_all_extensions',
+    'add_type', 'read_mime_types', 'parse_mimetypes',
 ]
 
 knownfiles = [
-    "/etc/mime.types",
-    "/etc/httpd/mime.types",                    # Mac OS X
-    "/etc/httpd/conf/mime.types",               # Apache
+    # later files will override type mappings from earlier ones
+    "/usr/local/lib/netscape/mime.types",       # Netscape
     "/etc/apache/mime.types",                   # Apache 1
-    "/etc/apache2/mime.types",                  # Apache 2
-    "/usr/local/etc/httpd/conf/mime.types",
-    "/usr/local/lib/netscape/mime.types",
+    "/etc/httpd/conf/mime.types",               # Apache 1.3
     "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
     "/usr/local/etc/mime.types",                # Apache 1.3
-    ]
+    "/etc/httpd/mime.types",                    # early Mac OS X
+    "/usr/local/etc/httpd/conf/mime.types",
+    "/etc/mime.types",                          # Linux
+    "/etc/apache2/mime.types",                  # Apache 2
+    "/usr/local/apache2/conf/mime.types",       # Apache 2
+]
 
-inited = False
-_db = None
-
-
-class MimeTypes:
+class MimeTypes(object):
     """MIME-types datastore.
 
-    This datastore can handle information from mime.types-style files
-    and supports basic determination of MIME type from a filename or
-    URL, and can guess a reasonable extension given a MIME type.
+    This datastore can handle information from mime.types-style files and
+    supports basic determination of MIME type from a filename or URL, and can
+    guess a reasonable extension given a MIME type.
     """
-
-    def __init__(self, filenames=(), strict=True):
-        if not inited:
-            init()
-        self.encodings_map = encodings_map.copy()
-        self.suffix_map = suffix_map.copy()
+    def __init__(self, filenames=knownfiles, strict=True):
+        self.encodings_map = _encodings_map.copy()
+        self.suffix_map = _suffix_map.copy()
         self.types_map = ({}, {}) # dict for (non-strict, strict)
-        self.types_map_inv = ({}, {})
-        for (ext, type) in types_map.items():
-            self.add_type(type, ext, True)
-        for (ext, type) in common_types.items():
-            self.add_type(type, ext, False)
-        for name in filenames:
-            self.read(name, strict)
+        self.types_map_inv = (defaultdict(list), defaultdict(list))
+        self.register(_strict_types, True)
+        self.register(_lenient_types, False)
+        for filename in filenames:
+            try:
+                self.register(parse_mimetypes(filename), strict)
+            except IOError:
+                continue
 
+    def register(self, pairs, strict=True):
+        """Add a set of mappings from extension -> MIME type
+
+        Takes in an iterable of (extension, type) pairs. For known extensions
+        the new types will replace the old ones. For known types, the
+        extension will be added to the list of known extensions.
+
+        If `strict' is true, information will be added to list of standard
+        types, else to the list of non-standard types.
+        """
+        for ext, type in pairs:
+            self.types_map[strict][ext] = type.lower()
+            ext_list = self.types_map_inv[strict][type]
+            if ext not in ext_list:
+                ext_list.append(ext)
+
     def add_type(self, type, ext, strict=True):
         """Add a mapping between a type and an extension.
 
-        When the extension is already known, the new
-        type will replace the old one. When the type
-        is already known the extension will be added
-        to the list of known extensions.
+        Passes a single-element dictionary to `register`.
 
-        If strict is true, information will be added to
-        list of standard types, else to the list of non-standard
-        types.
+        Deprecated: Call MimeTypes.register([(ext, type)]) instead.
         """
-        self.types_map[strict][ext] = type
-        exts = self.types_map_inv[strict].setdefault(type, [])
-        if ext not in exts:
-            exts.append(ext)
+        warnings.warn("deprecated", DeprecationWarning)
+        self.register([(type, ext)], strict)
 
+    def _type_from_data_url(self, url):
+        # syntax of data URLs:
+        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+        # mediatype := [ type "/" subtype ] *( ";" parameter )
+        # data      := *urlchar
+        # parameter := attribute "=" value
+        if ',' not in url:
+            return None, None # bad data URL
+        type = url.split(',', 1)[0].split(';', 1)[0] # up to first , or ;
+        if '=' in type or '/' not in type:
+            type = 'text/plain'   # type/subtype defaults to "text/plain"
+        return type
+
     def guess_type(self, url, strict=True):
         """Guess the type of a file based on its URL.
 
-        Return value is a tuple (type, encoding) where type is None if
-        the type can't be guessed (no or unknown suffix) or a string
-        of the form type/subtype, usable for a MIME Content-type
-        header; and encoding is None for no encoding or the name of
-        the program used to encode (e.g. compress or gzip).  The
-        mappings are table driven.  Encoding suffixes are case
-        sensitive; type suffixes are first tried case sensitive, then
-        case insensitive.
+        Return value is a tuple (type, encoding) where type is None if the
+        type can't be guessed (no or unknown suffix) or a string of the form
+        type/subtype, usable for a MIME Content-type header; and encoding is
+        None for no encoding or the name of the program used to encode (e.g.
+        compress or gzip). Encoding suffixes are case sensitive; type suffixes
+        are first tried case sensitive, then case insensitive.
 
-        The suffixes .tgz, .taz and .tz (case sensitive!) are all
-        mapped to '.tar.gz'.  (This is table-driven too, using the
-        dictionary suffix_map.)
+        The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped to
+        '.tar.gz'.  (This is table-driven too, using self.suffix_map.)
 
-        Optional `strict' argument when False adds a bunch of commonly found,
-        but non-standard types.
+        Optional `strict' argument when False adds commonly found but
+        non-standard types.
         """
         scheme, url = urllib.splittype(url)
         if scheme == 'data':
-            # syntax of data URLs:
-            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
-            # mediatype := [ type "/" subtype ] *( ";" parameter )
-            # data      := *urlchar
-            # parameter := attribute "=" value
-            # type/subtype defaults to "text/plain"
-            comma = url.find(',')
-            if comma < 0:
-                # bad data URL
-                return None, None
-            semi = url.find(';', 0, comma)
-            if semi >= 0:
-                type = url[:semi]
-            else:
-                type = url[:comma]
-            if '=' in type or '/' not in type:
-                type = 'text/plain'
-            return type, None           # never compressed, so encoding is None
+            return self._type_from_data_url(url), None # data URLs are never compressed
         base, ext = posixpath.splitext(url)
-        while ext in self.suffix_map:
+        if ext in self.suffix_map:
             base, ext = posixpath.splitext(base + self.suffix_map[ext])
-        if ext in self.encodings_map:
-            encoding = self.encodings_map[ext]
+        encoding = self.encodings_map.get(ext)
+        if encoding:
             base, ext = posixpath.splitext(base)
-        else:
-            encoding = None
-        types_map = self.types_map[True]
-        if ext in types_map:
-            return types_map[ext], encoding
-        elif ext.lower() in types_map:
-            return types_map[ext.lower()], encoding
-        elif strict:
-            return None, encoding
-        types_map = self.types_map[False]
-        if ext in types_map:
-            return types_map[ext], encoding
-        elif ext.lower() in types_map:
-            return types_map[ext.lower()], encoding
-        else:
-            return None, encoding
+        loose_map, strict_map = self.types_map
+        type = strict_map.get(ext) or strict_map.get(ext.lower())
+        if not strict:
+            type = type or loose_map.get(ext) or loose_map.get(ext.lower())
+        return type, encoding
 
     def guess_all_extensions(self, type, strict=True):
         """Guess the extensions for a file based on its MIME type.
@@ -156,8 +156,8 @@
         guaranteed to have been associated with any particular data stream,
         but would be mapped to the MIME type `type' by guess_type().
 
-        Optional `strict' argument when false adds a bunch of commonly found,
-        but non-standard types.
+        Optional `strict' argument when False adds commonly found but
+        non-standard types.
         """
         type = type.lower()
         extensions = self.types_map_inv[True].get(type, [])
@@ -170,369 +170,303 @@
     def guess_extension(self, type, strict=True):
         """Guess the extension for a file based on its MIME type.
 
-        Return value is a string giving a filename extension,
-        including the leading dot ('.').  The extension is not
-        guaranteed to have been associated with any particular data
-        stream, but would be mapped to the MIME type `type' by
-        guess_type().  If no extension can be guessed for `type', None
-        is returned.
+        Return value is a string giving a filename extension, including the
+        leading dot ('.').  The extension is not guaranteed to have been
+        associated with any particular data stream, but would be mapped to the
+        MIME type `type' by guess_type().  If no extension can be guessed for
+        `type', None is returned.
 
-        Optional `strict' argument when false adds a bunch of commonly found,
-        but non-standard types.
+        Optional `strict' argument when False adds commonly found but
+        non-standard types.
         """
         extensions = self.guess_all_extensions(type, strict)
-        if not extensions:
-            return None
-        return extensions[0]
+        return extensions[0] if extensions else None
 
     def read(self, filename, strict=True):
+        """Read a single mime.types-format file, specified by pathname, and
+        add its contents to the type mappings. Originally called at instance
+        initialization, but now only included for backwards compatibility and
+        unused internally.
+        
+        Raises an exception if the file can't be opened or read.
         """
-        Read a single mime.types-format file, specified by pathname.
+        warnings.warn("MimeTypes.read is deprecated.", DeprecationWarning)
+        self.register(parse_mimetypes(filename), strict)
 
-        If strict is true, information will be added to
-        list of standard types, else to the list of non-standard
-        types.
-        """
-        fp = open(filename)
-        self.readfp(fp, strict)
-        fp.close()
-
     def readfp(self, fp, strict=True):
+        """Read a single mime.types-format file. Originally a helper for the
+        `read' method, now only included for backwards compatibility and 
+        unused internally.
         """
-        Read a single mime.types-format file.
+        warnings.warn("MimeTypes.readfp is deprecated.", DeprecationWarning)
+        self.register(_parse_mimetypes(fp), strict)
 
-        If strict is true, information will be added to
-        list of standard types, else to the list of non-standard
-        types.
-        """
-        while 1:
-            line = fp.readline()
-            if not line:
-                break
-            words = line.split()
-            for i in range(len(words)):
-                if words[i][0] == '#':
-                    del words[i:]
-                    break
-            if not words:
-                continue
-            type, suffixes = words[0], words[1:]
-            for suff in suffixes:
-                self.add_type(type, '.' + suff, strict)
-
-def guess_type(url, strict=True):
-    """Guess the type of a file based on its URL.
-
-    Return value is a tuple (type, encoding) where type is None if the
-    type can't be guessed (no or unknown suffix) or a string of the
-    form type/subtype, usable for a MIME Content-type header; and
-    encoding is None for no encoding or the name of the program used
-    to encode (e.g. compress or gzip).  The mappings are table
-    driven.  Encoding suffixes are case sensitive; type suffixes are
-    first tried case sensitive, then case insensitive.
-
-    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
-    to ".tar.gz".  (This is table-driven too, using the dictionary
-    suffix_map).
-
-    Optional `strict' argument when false adds a bunch of commonly found, but
-    non-standard types.
+def _parse_mimetypes(types_lines):
+    """Parse Apache mime.types format.
+    
+    Takes an iterable of lines. Returns an iterable of (ext, type) pairs.
     """
-    if _db is None:
-        init()
-    return _db.guess_type(url, strict)
+    # XXX This function exists to support deprecated functions and methods
+    # and uses the same code as parse_mimetypes. Once those are gone, remove
+    # this function too.
+    for line in types_lines:
+        words = line.split('#', 1)[0].split()
+        if not words:
+            continue
+        type = words[0]
+        for extension in words[1:]:
+            yield ('.' + extension, type)
 
-
-def guess_all_extensions(type, strict=True):
-    """Guess the extensions for a file based on its MIME type.
-
-    Return value is a list of strings giving the possible filename
-    extensions, including the leading dot ('.').  The extension is not
-    guaranteed to have been associated with any particular data
-    stream, but would be mapped to the MIME type `type' by
-    guess_type().  If no extension can be guessed for `type', None
-    is returned.
-
-    Optional `strict' argument when false adds a bunch of commonly found,
-    but non-standard types.
+def parse_mimetypes(filename):
+    """ Parse a file in Apache mime.types format.
+    
+    Takes a filename and returns an iterable of (ext, type) pairs, suitable
+    for passing to the register method of a MimeTypes object.
     """
-    if _db is None:
-        init()
-    return _db.guess_all_extensions(type, strict)
+    with open(filename) as mimetypes_file:
+        for line in mimetypes_file:
+            words = line.split('#', 1)[0].split()
+            if not words:
+                continue
+            type = words[0]
+            for extension in words[1:]:
+                yield ('.' + extension, type)
 
-def guess_extension(type, strict=True):
-    """Guess the extension for a file based on its MIME type.
+def read_mime_types(filename):
+    """Read MIME types from a file.
 
-    Return value is a string giving a filename extension, including the
-    leading dot ('.').  The extension is not guaranteed to have been
-    associated with any particular data stream, but would be mapped to the
-    MIME type `type' by guess_type().  If no extension can be guessed for
-    `type', None is returned.
-
-    Optional `strict' argument when false adds a bunch of commonly found,
-    but non-standard types.
+    Read a file in the format of Apache's mime.types, and returns a mapping
+    from extensions to MIME types, or returns None if an I/O Error occurs.
+    
+    Deprecated: Instead, call `dict(parse_mimetypes(filename))` or
+    `list(parse_mimetypes(filename))`.
     """
-    if _db is None:
-        init()
-    return _db.guess_extension(type, strict)
-
-def add_type(type, ext, strict=True):
-    """Add a mapping between a type and an extension.
-
-    When the extension is already known, the new
-    type will replace the old one. When the type
-    is already known the extension will be added
-    to the list of known extensions.
-
-    If strict is true, information will be added to
-    list of standard types, else to the list of non-standard
-    types.
-    """
-    if _db is None:
-        init()
-    return _db.add_type(type, ext, strict)
-
-
-def init(files=None):
-    global suffix_map, types_map, encodings_map, common_types
-    global inited, _db
-    inited = True    # so that MimeTypes.__init__() doesn't call us again
-    db = MimeTypes()
-    if files is None:
-        files = knownfiles
-    for file in files:
-        if os.path.isfile(file):
-            db.readfp(open(file))
-    encodings_map = db.encodings_map
-    suffix_map = db.suffix_map
-    types_map = db.types_map[True]
-    common_types = db.types_map[False]
-    # Make the DB a global variable now that it is fully initialized
-    _db = db
-
-
-def read_mime_types(file):
+    warnings.warn("read_mime_types is deprecated", DeprecationWarning)
     try:
-        f = open(file)
+        return dict(parse_mimetypes(filename))
     except IOError:
         return None
-    db = MimeTypes()
-    db.readfp(f, True)
-    return db.types_map[True]
 
-
-def _default_mime_types():
-    global suffix_map
-    global encodings_map
-    global types_map
-    global common_types
-
-    suffix_map = {
-        '.tgz': '.tar.gz',
-        '.taz': '.tar.gz',
-        '.tz': '.tar.gz',
-        '.tbz2': '.tar.bz2',
-        }
-
-    encodings_map = {
-        '.gz': 'gzip',
-        '.Z': 'compress',
-        '.bz2': 'bzip2',
-        }
-
-    # Before adding new types, make sure they are either registered with IANA,
-    # at http://www.isi.edu/in-notes/iana/assignments/media-types
-    # or extensions, i.e. using the x- prefix
-
-    # If you add to these, please keep them sorted!
-    types_map = {
-        '.a'      : 'application/octet-stream',
-        '.ai'     : 'application/postscript',
-        '.aif'    : 'audio/x-aiff',
-        '.aifc'   : 'audio/x-aiff',
-        '.aiff'   : 'audio/x-aiff',
-        '.au'     : 'audio/basic',
-        '.avi'    : 'video/x-msvideo',
-        '.bat'    : 'text/plain',
-        '.bcpio'  : 'application/x-bcpio',
-        '.bin'    : 'application/octet-stream',
-        '.bmp'    : 'image/x-ms-bmp',
-        '.c'      : 'text/plain',
-        # Duplicates :(
-        '.cdf'    : 'application/x-cdf',
-        '.cdf'    : 'application/x-netcdf',
-        '.cpio'   : 'application/x-cpio',
-        '.csh'    : 'application/x-csh',
-        '.css'    : 'text/css',
-        '.dll'    : 'application/octet-stream',
-        '.doc'    : 'application/msword',
-        '.dot'    : 'application/msword',
-        '.dvi'    : 'application/x-dvi',
-        '.eml'    : 'message/rfc822',
-        '.eps'    : 'application/postscript',
-        '.etx'    : 'text/x-setext',
-        '.exe'    : 'application/octet-stream',
-        '.gif'    : 'image/gif',
-        '.gtar'   : 'application/x-gtar',
-        '.h'      : 'text/plain',
-        '.hdf'    : 'application/x-hdf',
-        '.htm'    : 'text/html',
-        '.html'   : 'text/html',
-        '.ief'    : 'image/ief',
-        '.jpe'    : 'image/jpeg',
-        '.jpeg'   : 'image/jpeg',
-        '.jpg'    : 'image/jpeg',
-        '.js'     : 'application/x-javascript',
-        '.ksh'    : 'text/plain',
-        '.latex'  : 'application/x-latex',
-        '.m1v'    : 'video/mpeg',
-        '.man'    : 'application/x-troff-man',
-        '.me'     : 'application/x-troff-me',
-        '.mht'    : 'message/rfc822',
-        '.mhtml'  : 'message/rfc822',
-        '.mif'    : 'application/x-mif',
-        '.mov'    : 'video/quicktime',
-        '.movie'  : 'video/x-sgi-movie',
-        '.mp2'    : 'audio/mpeg',
-        '.mp3'    : 'audio/mpeg',
-        '.mp4'    : 'video/mp4',
-        '.mpa'    : 'video/mpeg',
-        '.mpe'    : 'video/mpeg',
-        '.mpeg'   : 'video/mpeg',
-        '.mpg'    : 'video/mpeg',
-        '.ms'     : 'application/x-troff-ms',
-        '.nc'     : 'application/x-netcdf',
-        '.nws'    : 'message/rfc822',
-        '.o'      : 'application/octet-stream',
-        '.obj'    : 'application/octet-stream',
-        '.oda'    : 'application/oda',
-        '.p12'    : 'application/x-pkcs12',
-        '.p7c'    : 'application/pkcs7-mime',
-        '.pbm'    : 'image/x-portable-bitmap',
-        '.pdf'    : 'application/pdf',
-        '.pfx'    : 'application/x-pkcs12',
-        '.pgm'    : 'image/x-portable-graymap',
-        '.pl'     : 'text/plain',
-        '.png'    : 'image/png',
-        '.pnm'    : 'image/x-portable-anymap',
-        '.pot'    : 'application/vnd.ms-powerpoint',
-        '.ppa'    : 'application/vnd.ms-powerpoint',
-        '.ppm'    : 'image/x-portable-pixmap',
-        '.pps'    : 'application/vnd.ms-powerpoint',
-        '.ppt'    : 'application/vnd.ms-powerpoint',
-        '.ps'     : 'application/postscript',
-        '.pwz'    : 'application/vnd.ms-powerpoint',
-        '.py'     : 'text/x-python',
-        '.pyc'    : 'application/x-python-code',
-        '.pyo'    : 'application/x-python-code',
-        '.qt'     : 'video/quicktime',
-        '.ra'     : 'audio/x-pn-realaudio',
-        '.ram'    : 'application/x-pn-realaudio',
-        '.ras'    : 'image/x-cmu-raster',
-        '.rdf'    : 'application/xml',
-        '.rgb'    : 'image/x-rgb',
-        '.roff'   : 'application/x-troff',
-        '.rtx'    : 'text/richtext',
-        '.sgm'    : 'text/x-sgml',
-        '.sgml'   : 'text/x-sgml',
-        '.sh'     : 'application/x-sh',
-        '.shar'   : 'application/x-shar',
-        '.snd'    : 'audio/basic',
-        '.so'     : 'application/octet-stream',
-        '.src'    : 'application/x-wais-source',
-        '.sv4cpio': 'application/x-sv4cpio',
-        '.sv4crc' : 'application/x-sv4crc',
-        '.swf'    : 'application/x-shockwave-flash',
-        '.t'      : 'application/x-troff',
-        '.tar'    : 'application/x-tar',
-        '.tcl'    : 'application/x-tcl',
-        '.tex'    : 'application/x-tex',
-        '.texi'   : 'application/x-texinfo',
-        '.texinfo': 'application/x-texinfo',
-        '.tif'    : 'image/tiff',
-        '.tiff'   : 'image/tiff',
-        '.tr'     : 'application/x-troff',
-        '.tsv'    : 'text/tab-separated-values',
-        '.txt'    : 'text/plain',
-        '.ustar'  : 'application/x-ustar',
-        '.vcf'    : 'text/x-vcard',
-        '.wav'    : 'audio/x-wav',
-        '.wiz'    : 'application/msword',
-        '.wsdl'   : 'application/xml',
-        '.xbm'    : 'image/x-xbitmap',
-        '.xlb'    : 'application/vnd.ms-excel',
-        # Duplicates :(
-        '.xls'    : 'application/excel',
-        '.xls'    : 'application/vnd.ms-excel',
-        '.xml'    : 'text/xml',
-        '.xpdl'   : 'application/xml',
-        '.xpm'    : 'image/x-xpixmap',
-        '.xsl'    : 'application/xml',
-        '.xwd'    : 'image/x-xwindowdump',
-        '.zip'    : 'application/zip',
-        }
-
+_suffix_map = {
+    '.tgz': '.tar.gz',
+    '.taz': '.tar.gz',
+    '.tz': '.tar.gz',
+    '.tbz2': '.tar.bz2',
+}
+_encodings_map = {
+    '.gz': 'gzip',
+    '.Z': 'compress',
+    '.bz2': 'bzip2',
+}
+_strict_types = [
+    # IANA registry: http://www.isi.edu/in-notes/iana/assignments/media-types
+    # Apache: http://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types
+    
+    # later rows override previous rows for the extension -> type map, but add
+    # themselves to the end of the list for the type -> extension map
+    ('.a'      , 'application/octet-stream'),
+    ('.ai'     , 'application/postscript'),
+    ('.aif'    , 'audio/x-aiff'),
+    ('.aifc'   , 'audio/x-aiff'),
+    ('.aiff'   , 'audio/x-aiff'),
+    ('.au'     , 'audio/basic'),
+    ('.avi'    , 'video/x-msvideo'),
+    ('.bat'    , 'text/plain'),
+    ('.bcpio'  , 'application/x-bcpio'),
+    ('.bin'    , 'application/octet-stream'),
+    ('.bmp'    , 'image/x-ms-bmp'),
+    ('.c'      , 'text/plain'),
+    ('.cdf'    , 'application/x-cdf'),
+    ('.cdf'    , 'application/x-netcdf'), # canonical
+    ('.cpio'   , 'application/x-cpio'),
+    ('.csh'    , 'application/x-csh'),
+    ('.css'    , 'text/css'),
+    ('.dll'    , 'application/octet-stream'),
+    ('.doc'    , 'application/msword'),
+    ('.dot'    , 'application/msword'),
+    ('.dvi'    , 'application/x-dvi'),
+    ('.eml'    , 'message/rfc822'),
+    ('.eps'    , 'application/postscript'),
+    ('.etx'    , 'text/x-setext'),
+    ('.exe'    , 'application/octet-stream'),
+    ('.gif'    , 'image/gif'),
+    ('.gtar'   , 'application/x-gtar'),
+    ('.h'      , 'text/plain'),
+    ('.hdf'    , 'application/x-hdf'),
+    ('.htm'    , 'text/html'),
+    ('.html'   , 'text/html'),
+    ('.ief'    , 'image/ief'),
+    ('.jpe'    , 'image/jpeg'),
+    ('.jpeg'   , 'image/jpeg'),
+    ('.jpg'    , 'image/jpeg'),
+    ('.js'     , 'application/x-javascript'),
+    ('.ksh'    , 'text/plain'),
+    ('.latex'  , 'application/x-latex'),
+    ('.m1v'    , 'video/mpeg'),
+    ('.man'    , 'application/x-troff-man'),
+    ('.me'     , 'application/x-troff-me'),
+    ('.mht'    , 'message/rfc822'),
+    ('.mhtml'  , 'message/rfc822'),
+    ('.mif'    , 'application/x-mif'),
+    ('.mov'    , 'video/quicktime'),
+    ('.movie'  , 'video/x-sgi-movie'),
+    ('.mp2'    , 'audio/mpeg'),
+    ('.mp3'    , 'audio/mpeg'),
+    ('.mp4'    , 'video/mp4'),
+    ('.mpa'    , 'video/mpeg'),
+    ('.mpe'    , 'video/mpeg'),
+    ('.mpeg'   , 'video/mpeg'),
+    ('.mpg'    , 'video/mpeg'),
+    ('.ms'     , 'application/x-troff-ms'),
+    ('.nc'     , 'application/x-netcdf'),
+    ('.nws'    , 'message/rfc822'),
+    ('.o'      , 'application/octet-stream'),
+    ('.obj'    , 'application/octet-stream'),
+    ('.oda'    , 'application/oda'),
+    ('.p12'    , 'application/x-pkcs12'),
+    ('.p7c'    , 'application/pkcs7-mime'),
+    ('.pbm'    , 'image/x-portable-bitmap'),
+    ('.pdf'    , 'application/pdf'),
+    ('.pfx'    , 'application/x-pkcs12'),
+    ('.pgm'    , 'image/x-portable-graymap'),
+    ('.pl'     , 'text/plain'),
+    ('.png'    , 'image/png'),
+    ('.pnm'    , 'image/x-portable-anymap'),
+    ('.pot'    , 'application/vnd.ms-powerpoint'),
+    ('.ppa'    , 'application/vnd.ms-powerpoint'),
+    ('.ppm'    , 'image/x-portable-pixmap'),
+    ('.pps'    , 'application/vnd.ms-powerpoint'),
+    ('.ppt'    , 'application/vnd.ms-powerpoint'),
+    ('.ps'     , 'application/postscript'),
+    ('.pwz'    , 'application/vnd.ms-powerpoint'),
+    ('.py'     , 'text/x-python'),
+    ('.pyc'    , 'application/x-python-code'),
+    ('.pyo'    , 'application/x-python-code'),
+    ('.qt'     , 'video/quicktime'),
+    ('.ra'     , 'audio/x-pn-realaudio'),
+    ('.ram'    , 'application/x-pn-realaudio'),
+    ('.ras'    , 'image/x-cmu-raster'),
+    ('.rdf'    , 'application/xml'),
+    ('.rgb'    , 'image/x-rgb'),
+    ('.roff'   , 'application/x-troff'),
+    ('.rtx'    , 'text/richtext'),
+    ('.sgm'    , 'text/x-sgml'),
+    ('.sgml'   , 'text/x-sgml'),
+    ('.sh'     , 'application/x-sh'),
+    ('.shar'   , 'application/x-shar'),
+    ('.snd'    , 'audio/basic'),
+    ('.so'     , 'application/octet-stream'),
+    ('.src'    , 'application/x-wais-source'),
+    ('.sv4cpio', 'application/x-sv4cpio'),
+    ('.sv4crc' , 'application/x-sv4crc'),
+    ('.swf'    , 'application/x-shockwave-flash'),
+    ('.t'      , 'application/x-troff'),
+    ('.tar'    , 'application/x-tar'),
+    ('.tcl'    , 'application/x-tcl'),
+    ('.tex'    , 'application/x-tex'),
+    ('.texi'   , 'application/x-texinfo'),
+    ('.texinfo', 'application/x-texinfo'),
+    ('.tif'    , 'image/tiff'),
+    ('.tiff'   , 'image/tiff'),
+    ('.tr'     , 'application/x-troff'),
+    ('.tsv'    , 'text/tab-separated-values'),
+    ('.txt'    , 'text/plain'),
+    ('.ustar'  , 'application/x-ustar'),
+    ('.vcf'    , 'text/x-vcard'),
+    ('.wav'    , 'audio/x-wav'),
+    ('.wiz'    , 'application/msword'),
+    ('.wsdl'   , 'application/xml'),
+    ('.xbm'    , 'image/x-xbitmap'),
+    ('.xlb'    , 'application/vnd.ms-excel'),
+    ('.xls'    , 'application/excel'),
+    ('.xls'    , 'application/vnd.ms-excel'), # canonical
+    ('.xml'    , 'text/xml'),
+    ('.xpdl'   , 'application/xml'),
+    ('.xpm'    , 'image/x-xpixmap'),
+    ('.xsl'    , 'application/xml'),
+    ('.xwd'    , 'image/x-xwindowdump'),
+    ('.zip'    , 'application/zip'),
+]
+_lenient_types = [
     # These are non-standard types, commonly found in the wild.  They will
     # only match if strict=0 flag is given to the API methods.
+    ('.jpg' , 'image/jpg'),
+    ('.mid' , 'audio/midi'),
+    ('.midi', 'audio/midi'),
+    ('.pct' , 'image/pict'),
+    ('.pic' , 'image/pict'),
+    ('.pict', 'image/pict'),
+    ('.rtf' , 'application/rtf'),
+    ('.xul' , 'text/xul'),
+]
 
-    # Please sort these too
-    common_types = {
-        '.jpg' : 'image/jpg',
-        '.mid' : 'audio/midi',
-        '.midi': 'audio/midi',
-        '.pct' : 'image/pict',
-        '.pic' : 'image/pict',
-        '.pict': 'image/pict',
-        '.rtf' : 'application/rtf',
-        '.xul' : 'text/xul'
-        }
+def _init_singleton(_filenames=knownfiles):
+    # remove _filenames argument when the init funciton is removed.
+    global _db, guess_type, guess_extension, guess_all_extensions, add_type
+    global register, encodings_map, suffix_map, types_map, common_types
+    _db = MimeTypes(filenames=_filenames)
+    guess_type = _db.guess_type
+    guess_all_extensions = _db.guess_all_extensions
+    guess_extension = _db.guess_extension
+    add_type = _db.add_type
+    encodings_map = _db.encodings_map
+    suffix_map = _db.suffix_map
+    types_map, common_types = _db.types_map
+    
+    # inited and init (the next 18 LOC) included for backwards compatibility.
+    global inited
+    inited = True
 
+def init(files=knownfiles):
+    """Re-initialize the module.
 
-_default_mime_types()
+    Specifically, re-initialize singleton MimeTypes object and global
+    functions and objects.
 
+    This should be used with care, because resetting the module to defaults
+    could break other third-party code which has changed the contents of these
+    objects and has expectations about their state.
 
-if __name__ == '__main__':
-    import sys
-    import getopt
+    To make custom type mappings, use your own MimeTypes instance instead.
+    
+    init and the module global inited flag are deprecated."""
+    warnings.warn("init and inited are deprecated", DeprecationWarning)
+    return _init_singleton(_filenames=files)
 
-    USAGE = """\
-Usage: mimetypes.py [options] type
+_init_singleton() # initialize the module
 
-Options:
-    --help / -h       -- print this message and exit
-    --lenient / -l    -- additionally search of some common, but non-standard
-                         types.
-    --extension / -e  -- guess extension instead of type
-
-More than one type argument may be given.
-"""
-
-    def usage(code, msg=''):
+if __name__ == '__main__':
+    import sys
+    from getopt import getopt, GetoptError
+    USAGE = dedent("""\
+        Usage: mimetypes.py [options] type
+        
+        Options:
+            --help / -h       -- print this message and exit
+            --lenient / -l    -- additionally search common non-standard types.
+            --extension / -e  -- guess extension instead of type
+        
+        More than one type argument may be given.""")
+    try:
+        shortopts, longopts = 'hle', ['help', 'lenient', 'extension']
+        opts, args = getopt(sys.argv[1:], shortopts, longopts)
+    except GetoptError, msg:
+        print msg
         print USAGE
-        if msg: print msg
-        sys.exit(code)
+        sys.exit(1)
 
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'hle',
-                                   ['help', 'lenient', 'extension'])
-    except getopt.error, msg:
-        usage(1, msg)
-
-    strict = 1
-    extension = 0
-    for opt, arg in opts:
-        if opt in ('-h', '--help'):
-            usage(0)
-        elif opt in ('-l', '--lenient'):
-            strict = 0
-        elif opt in ('-e', '--extension'):
-            extension = 1
+    opts = set(opt for opt, arg in opts)
+    if '-h' in opts or '--help' in opts:
+        print USAGE
+        sys.exit(0)
+    strict = not ('-l' in opts or '--lenient' in opts)
     for gtype in args:
-        if extension:
+        if '-e' in opts or '--extension' in opts:
             guess = guess_extension(gtype, strict)
-            if not guess: print "I don't know anything about type", gtype
-            else: print guess
+            print guess or "I don't know anything about type %s" % (gtype,)
         else:
             guess, encoding = guess_type(gtype, strict)
-            if not guess: print "I don't know anything about type", gtype
-            else: print 'type:', guess, 'encoding:', encoding
+            if not guess:
+                print "I don't know anything about type", gtype
+            else:
+                print 'type: %s, encoding: %s' % (guess, encoding)
\ No newline at end of file
Index: Lib/test/test_mimetypes.py
===================================================================
--- Lib/test/test_mimetypes.py	(revision 74455)
+++ Lib/test/test_mimetypes.py	(working copy)
@@ -7,12 +7,12 @@
 # Tell it we don't know about external files:
 mimetypes.knownfiles = []
 mimetypes.inited = False
-mimetypes._default_mime_types()
+mimetypes._init_singleton()
 
 
 class MimeTypesTestCase(unittest.TestCase):
     def setUp(self):
-        self.db = mimetypes.MimeTypes()
+        self.db = mimetypes.MimeTypes(filenames=[])
 
     def test_default_data(self):
         eq = self.assertEqual