"""Guess the MIME type of a file. This module defines two useful functions: guess_type(url) -- guess the MIME type and encoding of a URL. guess_extension(type) -- guess the extension for a given MIME type. It also contains the following, for tuning the behavior: Data: knownfiles -- list of files to parse inited -- flag set when init() has been called suffixes_map -- dictionary mapping suffixes to suffixes encodings_map -- dictionary mapping suffixes to encodings types_map -- dictionary mapping suffixes to types Functions: init([files]) -- parse a list of files, default knownfiles read_mime_types(file) -- parse one file, return a dictionary or None """ import os import posixpath import urllib __all__ = ["guess_type","guess_extension","read_mime_types","init"] knownfiles = [ "/usr/local/etc/httpd/conf/mime.types", "/usr/local/lib/netscape/mime.types", "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 "/usr/local/etc/mime.types", # Apache 1.3 ] inited = 0 class MimeTypes: """MIME-types datastore. This datastore can handle information from mime.types-style files and supports basic determination of MIME type from a filename or URL, and can guess a reasonable extension given a MIME type. """ def __init__(self, filenames=()): self.encodings_map = {} self.suffixes_map = {} self.types_map = {} for name in filenames: if os.path.exists(name): self.read(name) def guess_type(self, url): """Guess the type of a file based on its URL. Return value is a tuple (type, encoding) where type is None if the type can't be guessed (no or unknown suffix) or a string of the form type/subtype, usable for a MIME Content-type header; and encoding is None for no encoding or the name of the program used to encode (e.g. compress or gzip). The mappings are table driven. Encoding suffixes are case sensitive; type suffixes are first tried case sensitive, then case insensitive. The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped to '.tar.gz'. (This is table-driven too, using the dictionary suffix_map.) """ scheme, url = urllib.splittype(url) if scheme == 'data': # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # mediatype := [ type "/" subtype ] *( ";" parameter ) # data := *urlchar # parameter := attribute "=" value # type/subtype defaults to "text/plain" comma = url.find(',') if comma < 0: # bad data URL return None, None semi = url.find(';', 0, comma) if semi >= 0: type = url[:semi] else: type = url[:comma] if '=' in type or '/' not in type: type = 'text/plain' return type, None # never compressed, so encoding is None base, ext = posixpath.splitext(url) while self.suffix_map.has_key(ext): base, ext = posixpath.splitext(base + self.suffix_map[ext]) if self.encodings_map.has_key(ext): encoding = self.encodings_map[ext] base, ext = posixpath.splitext(base) else: encoding = None types_map = self.types_map if types_map.has_key(ext): return types_map[ext], encoding elif types_map.has_key(ext.lower()): return types_map[ext.lower()], encoding else: return None, encoding def guess_extension(self, type): """Guess the extension for a file based on its MIME type. Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, but would be mapped to the MIME type `type' by guess_type(). If no extension can be guessed for `type', None is returned. """ type = type.lower() for ext, stype in self.types_map.items(): if type == stype: return ext return None def read(self, filename): """Read a single mime.types-format file, specified by pathname.""" fp = open(filename) self.readfp(fp) fp.close() def readfp(self): """Read a single mime.types-format file.""" map = self.types_map while 1: line = f.readline() if not line: break words = line.split() for i in range(len(words)): if words[i][0] == '#': del words[i:] break if not words: continue type, suffixes = words[0], words[1:] for suff in suffixes: map['.' + suff] = type def guess_type(url): """Guess the type of a file based on its URL. Return value is a tuple (type, encoding) where type is None if the type can't be guessed (no or unknown suffix) or a string of the form type/subtype, usable for a MIME Content-type header; and encoding is None for no encoding or the name of the program used to encode (e.g. compress or gzip). The mappings are table driven. Encoding suffixes are case sensitive; type suffixes are first tried case sensitive, then case insensitive. The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped to ".tar.gz". (This is table-driven too, using the dictionary suffix_map). """ init() return guess_type(url) def guess_extension(type): """Guess the extension for a file based on its MIME type. Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, but would be mapped to the MIME type `type' by guess_type(). If no extension can be guessed for `type', None is returned. """ init() return guess_extension(type) def init(files=None): global guess_extension, guess_type global inited db = MimeTypes() db.encodings_map = encodings_map.copy() db.suffix_map = suffix_map.copy() db.types_map = types_map.copy() if files is None: files = knownfiles for file in files: if os.path.isfile(file): db.readfp(open(file)) guess_extension = db.guess_extension guess_type = db.guess_type inited = 1 def read_mime_types(file): try: f = open(file) except IOError: return None db = MimeTypes() db.readfp(f) return db.types_map suffix_map = { '.tgz': '.tar.gz', '.taz': '.tar.gz', '.tz': '.tar.gz', } encodings_map = { '.gz': 'gzip', '.Z': 'compress', } types_map = { '.a': 'application/octet-stream', '.ai': 'application/postscript', '.aif': 'audio/x-aiff', '.aifc': 'audio/x-aiff', '.aiff': 'audio/x-aiff', '.au': 'audio/basic', '.avi': 'video/x-msvideo', '.bcpio': 'application/x-bcpio', '.bin': 'application/octet-stream', '.cdf': 'application/x-netcdf', '.cpio': 'application/x-cpio', '.csh': 'application/x-csh', '.dll': 'application/octet-stream', '.dvi': 'application/x-dvi', '.exe': 'application/octet-stream', '.eps': 'application/postscript', '.etx': 'text/x-setext', '.gif': 'image/gif', '.gtar': 'application/x-gtar', '.hdf': 'application/x-hdf', '.htm': 'text/html', '.html': 'text/html', '.ief': 'image/ief', '.jpe': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpg': 'image/jpeg', '.js': 'application/x-javascript', '.latex': 'application/x-latex', '.man': 'application/x-troff-man', '.me': 'application/x-troff-me', '.mif': 'application/x-mif', '.mov': 'video/quicktime', '.movie': 'video/x-sgi-movie', '.mpe': 'video/mpeg', '.mpeg': 'video/mpeg', '.mpg': 'video/mpeg', '.ms': 'application/x-troff-ms', '.nc': 'application/x-netcdf', '.o': 'application/octet-stream', '.obj': 'application/octet-stream', '.oda': 'application/oda', '.pbm': 'image/x-portable-bitmap', '.pdf': 'application/pdf', '.pgm': 'image/x-portable-graymap', '.pnm': 'image/x-portable-anymap', '.png': 'image/png', '.ppm': 'image/x-portable-pixmap', '.ps': 'application/postscript', '.py': 'text/x-python', '.pyc': 'application/x-python-code', '.pyo': 'application/x-python-code', '.qt': 'video/quicktime', '.ras': 'image/x-cmu-raster', '.rgb': 'image/x-rgb', '.rdf': 'application/xml', '.roff': 'application/x-troff', '.rtf': 'application/rtf', '.rtx': 'text/richtext', '.sgm': 'text/x-sgml', '.sgml': 'text/x-sgml', '.sh': 'application/x-sh', '.shar': 'application/x-shar', '.snd': 'audio/basic', '.so': 'application/octet-stream', '.src': 'application/x-wais-source', '.sv4cpio': 'application/x-sv4cpio', '.sv4crc': 'application/x-sv4crc', '.t': 'application/x-troff', '.tar': 'application/x-tar', '.tcl': 'application/x-tcl', '.tex': 'application/x-tex', '.texi': 'application/x-texinfo', '.texinfo': 'application/x-texinfo', '.tif': 'image/tiff', '.tiff': 'image/tiff', '.tr': 'application/x-troff', '.tsv': 'text/tab-separated-values', '.txt': 'text/plain', '.ustar': 'application/x-ustar', '.wav': 'audio/x-wav', '.xbm': 'image/x-xbitmap', '.xml': 'text/xml', '.xsl': 'application/xml', '.xpm': 'image/x-xpixmap', '.xwd': 'image/x-xwindowdump', '.zip': 'application/zip', } if __name__ == '__main__': import sys print guess_type(sys.argv[1])