summaryrefslogtreecommitdiff
path: root/modules/language/python/module/mimetypes.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/language/python/module/mimetypes.py')
-rw-r--r--modules/language/python/module/mimetypes.py553
1 files changed, 553 insertions, 0 deletions
diff --git a/modules/language/python/module/mimetypes.py b/modules/language/python/module/mimetypes.py
new file mode 100644
index 0000000..76a5e87
--- /dev/null
+++ b/modules/language/python/module/mimetypes.py
@@ -0,0 +1,553 @@
+module(mimetypes)
+"""Guess the MIME type of a file.
+
+This module defines two useful functions:
+
+guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
+
+guess_extension(type, strict=True) -- guess the extension for a given MIME type.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffix_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles (on Windows, the
+ default values are taken from the registry)
+read_mime_types(file) -- parse one file, return a dictionary or None
+"""
+
+import os
+import sys
+import posixpath
+import urllib.parse
+try:
+ import winreg as _winreg
+except ImportError:
+ _winreg = None
+
+__all__ = [
+ "knownfiles", "inited", "MimeTypes",
+ "guess_type", "guess_all_extensions", "guess_extension",
+ "add_type", "init", "read_mime_types",
+ "suffix_map", "encodings_map", "types_map", "common_types"
+]
+
+knownfiles = [
+ "/etc/mime.types",
+ "/etc/httpd/mime.types", # Mac OS X
+ "/etc/httpd/conf/mime.types", # Apache
+ "/etc/apache/mime.types", # Apache 1
+ "/etc/apache2/mime.types", # Apache 2
+ "/usr/local/etc/httpd/conf/mime.types",
+ "/usr/local/lib/netscape/mime.types",
+ "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
+ "/usr/local/etc/mime.types", # Apache 1.3
+ ]
+
+inited = False
+_db = None
+
+
+class MimeTypes:
+ """MIME-types datastore.
+
+ This datastore can handle information from mime.types-style files
+ and supports basic determination of MIME type from a filename or
+ URL, and can guess a reasonable extension given a MIME type.
+ """
+
+ def __init__(self, filenames=(), strict=True):
+ if not inited:
+ init()
+ self.encodings_map = encodings_map.copy()
+ self.suffix_map = suffix_map.copy()
+ self.types_map = ({}, {}) # dict for (non-strict, strict)
+ self.types_map_inv = ({}, {})
+ for (ext, type) in types_map.items():
+ self.add_type(type, ext, True)
+ for (ext, type) in common_types.items():
+ self.add_type(type, ext, False)
+ for name in filenames:
+ self.read(name, strict)
+
+ def add_type(self, type, ext, strict=True):
+ """Add a mapping between a type and an extension.
+
+ When the extension is already known, the new
+ type will replace the old one. When the type
+ is already known the extension will be added
+ to the list of known extensions.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ self.types_map[strict][ext] = type
+ exts = self.types_map_inv[strict].setdefault(type, [])
+ if ext not in exts:
+ exts.append(ext)
+
+ def guess_type(self, url, strict=True):
+ """Guess the type of a file based on its URL.
+
+ Return value is a tuple (type, encoding) where type is None if
+ the type can't be guessed (no or unknown suffix) or a string
+ of the form type/subtype, usable for a MIME Content-type
+ header; and encoding is None for no encoding or the name of
+ the program used to encode (e.g. compress or gzip). The
+ mappings are table driven. Encoding suffixes are case
+ sensitive; type suffixes are first tried case sensitive, then
+ case insensitive.
+
+ The suffixes .tgz, .taz and .tz (case sensitive!) are all
+ mapped to '.tar.gz'. (This is table-driven too, using the
+ dictionary suffix_map.)
+
+ Optional `strict' argument when False adds a bunch of commonly found,
+ but non-standard types.
+ """
+ scheme, url = urllib.parse.splittype(url)
+ if scheme == 'data':
+ # syntax of data URLs:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ # type/subtype defaults to "text/plain"
+ comma = url.find(',')
+ if comma < 0:
+ # bad data URL
+ return None, None
+ semi = url.find(';', 0, comma)
+ if semi >= 0:
+ type = url[:semi]
+ else:
+ type = url[:comma]
+ if '=' in type or '/' not in type:
+ type = 'text/plain'
+ return type, None # never compressed, so encoding is None
+ base, ext = posixpath.splitext(url)
+ while ext in self.suffix_map:
+ base, ext = posixpath.splitext(base + self.suffix_map[ext])
+ if ext in self.encodings_map:
+ encoding = self.encodings_map[ext]
+ base, ext = posixpath.splitext(base)
+ else:
+ encoding = None
+ types_map = self.types_map[True]
+ if ext in types_map:
+ return types_map[ext], encoding
+ elif ext.lower() in types_map:
+ return types_map[ext.lower()], encoding
+ elif strict:
+ return None, encoding
+ types_map = self.types_map[False]
+ if ext in types_map:
+ return types_map[ext], encoding
+ elif ext.lower() in types_map:
+ return types_map[ext.lower()], encoding
+ else:
+ return None, encoding
+
+ def guess_all_extensions(self, type, strict=True):
+ """Guess the extensions for a file based on its MIME type.
+
+ Return value is a list of strings giving the possible filename
+ extensions, including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data stream,
+ but would be mapped to the MIME type `type' by guess_type().
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ type = type.lower()
+ extensions = self.types_map_inv[True].get(type, [])
+ if not strict:
+ for ext in self.types_map_inv[False].get(type, []):
+ if ext not in extensions:
+ extensions.append(ext)
+ return extensions
+
+ def guess_extension(self, type, strict=True):
+ """Guess the extension for a file based on its MIME type.
+
+ Return value is a string giving a filename extension,
+ including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data
+ stream, but would be mapped to the MIME type `type' by
+ guess_type(). If no extension can be guessed for `type', None
+ is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ extensions = self.guess_all_extensions(type, strict)
+ if not extensions:
+ return None
+ return extensions[0]
+
+ def read(self, filename, strict=True):
+ """
+ Read a single mime.types-format file, specified by pathname.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ with open(filename, encoding='utf-8') as fp:
+ self.readfp(fp, strict)
+
+ def readfp(self, fp, strict=True):
+ """
+ Read a single mime.types-format file.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ words = line.split()
+ for i in range(len(words)):
+ if words[i][0] == '#':
+ del words[i:]
+ break
+ if not words:
+ continue
+ type, suffixes = words[0], words[1:]
+ for suff in suffixes:
+ self.add_type(type, '.' + suff, strict)
+
+ def read_windows_registry(self, strict=True):
+ """
+ Load the MIME types database from Windows registry.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+
+ # Windows only
+ if not _winreg:
+ return
+
+ def enum_types(mimedb):
+ i = 0
+ while True:
+ try:
+ ctype = _winreg.EnumKey(mimedb, i)
+ except EnvironmentError:
+ break
+ else:
+ if '\0' not in ctype:
+ yield ctype
+ i += 1
+
+ with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
+ for subkeyname in enum_types(hkcr):
+ try:
+ with _winreg.OpenKey(hkcr, subkeyname) as subkey:
+ # Only check file extensions
+ if not subkeyname.startswith("."):
+ continue
+ # raises EnvironmentError if no 'Content Type' value
+ mimetype, datatype = _winreg.QueryValueEx(
+ subkey, 'Content Type')
+ if datatype != _winreg.REG_SZ:
+ continue
+ self.add_type(mimetype, subkeyname, strict)
+ except EnvironmentError:
+ continue
+
+def guess_type(url, strict=True):
+ """Guess the type of a file based on its URL.
+
+ Return value is a tuple (type, encoding) where type is None if the
+ type can't be guessed (no or unknown suffix) or a string of the
+ form type/subtype, usable for a MIME Content-type header; and
+ encoding is None for no encoding or the name of the program used
+ to encode (e.g. compress or gzip). The mappings are table
+ driven. Encoding suffixes are case sensitive; type suffixes are
+ first tried case sensitive, then case insensitive.
+
+ The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+ to ".tar.gz". (This is table-driven too, using the dictionary
+ suffix_map).
+
+ Optional `strict' argument when false adds a bunch of commonly found, but
+ non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_type(url, strict)
+
+
+def guess_all_extensions(type, strict=True):
+ """Guess the extensions for a file based on its MIME type.
+
+ Return value is a list of strings giving the possible filename
+ extensions, including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data
+ stream, but would be mapped to the MIME type `type' by
+ guess_type(). If no extension can be guessed for `type', None
+ is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_all_extensions(type, strict)
+
+def guess_extension(type, strict=True):
+ """Guess the extension for a file based on its MIME type.
+
+ Return value is a string giving a filename extension, including the
+ leading dot ('.'). The extension is not guaranteed to have been
+ associated with any particular data stream, but would be mapped to the
+ MIME type `type' by guess_type(). If no extension can be guessed for
+ `type', None is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_extension(type, strict)
+
+def add_type(type, ext, strict=True):
+ """Add a mapping between a type and an extension.
+
+ When the extension is already known, the new
+ type will replace the old one. When the type
+ is already known the extension will be added
+ to the list of known extensions.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ if _db is None:
+ init()
+ return _db.add_type(type, ext, strict)
+
+
+def init(files=None):
+ global suffix_map, types_map, encodings_map, common_types
+ global inited, _db
+ inited = True # so that MimeTypes.__init__() doesn't call us again
+ db = MimeTypes()
+ if files is None:
+ if _winreg:
+ db.read_windows_registry()
+ files = knownfiles
+ for file in files:
+ if os.path.isfile(file):
+ db.read(file)
+ encodings_map = db.encodings_map
+ suffix_map = db.suffix_map
+ types_map = db.types_map[True]
+ common_types = db.types_map[False]
+ # Make the DB a global variable now that it is fully initialized
+ _db = db
+
+
+def read_mime_types(file):
+ try:
+ f = open(file)
+ except OSError:
+ return None
+ with f:
+ db = MimeTypes()
+ db.readfp(f, True)
+ return db.types_map[True]
+
+
+def _default_mime_types():
+ global suffix_map
+ global encodings_map
+ global types_map
+ global common_types
+
+ suffix_map = {
+ '.svgz': '.svg.gz',
+ '.tgz': '.tar.gz',
+ '.taz': '.tar.gz',
+ '.tz': '.tar.gz',
+ '.tbz2': '.tar.bz2',
+ '.txz': '.tar.xz',
+ }
+
+ encodings_map = {
+ '.gz': 'gzip',
+ '.Z': 'compress',
+ '.bz2': 'bzip2',
+ '.xz': 'xz',
+ }
+
+ # Before adding new types, make sure they are either registered with IANA,
+ # at http://www.iana.org/assignments/media-types
+ # or extensions, i.e. using the x- prefix
+
+ # If you add to these, please keep them sorted!
+ types_map = {
+ '.a' : 'application/octet-stream',
+ '.ai' : 'application/postscript',
+ '.aif' : 'audio/x-aiff',
+ '.aifc' : 'audio/x-aiff',
+ '.aiff' : 'audio/x-aiff',
+ '.au' : 'audio/basic',
+ '.avi' : 'video/x-msvideo',
+ '.bat' : 'text/plain',
+ '.bcpio' : 'application/x-bcpio',
+ '.bin' : 'application/octet-stream',
+ '.bmp' : 'image/x-ms-bmp',
+ '.c' : 'text/plain',
+ # Duplicates :(
+ '.cdf' : 'application/x-cdf',
+ '.cdf' : 'application/x-netcdf',
+ '.cpio' : 'application/x-cpio',
+ '.csh' : 'application/x-csh',
+ '.css' : 'text/css',
+ '.csv' : 'text/csv',
+ '.dll' : 'application/octet-stream',
+ '.doc' : 'application/msword',
+ '.dot' : 'application/msword',
+ '.dvi' : 'application/x-dvi',
+ '.eml' : 'message/rfc822',
+ '.eps' : 'application/postscript',
+ '.etx' : 'text/x-setext',
+ '.exe' : 'application/octet-stream',
+ '.gif' : 'image/gif',
+ '.gtar' : 'application/x-gtar',
+ '.h' : 'text/plain',
+ '.hdf' : 'application/x-hdf',
+ '.htm' : 'text/html',
+ '.html' : 'text/html',
+ '.ico' : 'image/vnd.microsoft.icon',
+ '.ief' : 'image/ief',
+ '.jpe' : 'image/jpeg',
+ '.jpeg' : 'image/jpeg',
+ '.jpg' : 'image/jpeg',
+ '.js' : 'application/javascript',
+ '.json' : 'application/json',
+ '.ksh' : 'text/plain',
+ '.latex' : 'application/x-latex',
+ '.m1v' : 'video/mpeg',
+ '.m3u' : 'application/vnd.apple.mpegurl',
+ '.m3u8' : 'application/vnd.apple.mpegurl',
+ '.man' : 'application/x-troff-man',
+ '.me' : 'application/x-troff-me',
+ '.mht' : 'message/rfc822',
+ '.mhtml' : 'message/rfc822',
+ '.mif' : 'application/x-mif',
+ '.mov' : 'video/quicktime',
+ '.movie' : 'video/x-sgi-movie',
+ '.mp2' : 'audio/mpeg',
+ '.mp3' : 'audio/mpeg',
+ '.mp4' : 'video/mp4',
+ '.mpa' : 'video/mpeg',
+ '.mpe' : 'video/mpeg',
+ '.mpeg' : 'video/mpeg',
+ '.mpg' : 'video/mpeg',
+ '.ms' : 'application/x-troff-ms',
+ '.nc' : 'application/x-netcdf',
+ '.nws' : 'message/rfc822',
+ '.o' : 'application/octet-stream',
+ '.obj' : 'application/octet-stream',
+ '.oda' : 'application/oda',
+ '.p12' : 'application/x-pkcs12',
+ '.p7c' : 'application/pkcs7-mime',
+ '.pbm' : 'image/x-portable-bitmap',
+ '.pdf' : 'application/pdf',
+ '.pfx' : 'application/x-pkcs12',
+ '.pgm' : 'image/x-portable-graymap',
+ '.pl' : 'text/plain',
+ '.png' : 'image/png',
+ '.pnm' : 'image/x-portable-anymap',
+ '.pot' : 'application/vnd.ms-powerpoint',
+ '.ppa' : 'application/vnd.ms-powerpoint',
+ '.ppm' : 'image/x-portable-pixmap',
+ '.pps' : 'application/vnd.ms-powerpoint',
+ '.ppt' : 'application/vnd.ms-powerpoint',
+ '.ps' : 'application/postscript',
+ '.pwz' : 'application/vnd.ms-powerpoint',
+ '.py' : 'text/x-python',
+ '.pyc' : 'application/x-python-code',
+ '.pyo' : 'application/x-python-code',
+ '.qt' : 'video/quicktime',
+ '.ra' : 'audio/x-pn-realaudio',
+ '.ram' : 'application/x-pn-realaudio',
+ '.ras' : 'image/x-cmu-raster',
+ '.rdf' : 'application/xml',
+ '.rgb' : 'image/x-rgb',
+ '.roff' : 'application/x-troff',
+ '.rtx' : 'text/richtext',
+ '.sgm' : 'text/x-sgml',
+ '.sgml' : 'text/x-sgml',
+ '.sh' : 'application/x-sh',
+ '.shar' : 'application/x-shar',
+ '.snd' : 'audio/basic',
+ '.so' : 'application/octet-stream',
+ '.src' : 'application/x-wais-source',
+ '.sv4cpio': 'application/x-sv4cpio',
+ '.sv4crc' : 'application/x-sv4crc',
+ '.svg' : 'image/svg+xml',
+ '.swf' : 'application/x-shockwave-flash',
+ '.t' : 'application/x-troff',
+ '.tar' : 'application/x-tar',
+ '.tcl' : 'application/x-tcl',
+ '.tex' : 'application/x-tex',
+ '.texi' : 'application/x-texinfo',
+ '.texinfo': 'application/x-texinfo',
+ '.tif' : 'image/tiff',
+ '.tiff' : 'image/tiff',
+ '.tr' : 'application/x-troff',
+ '.tsv' : 'text/tab-separated-values',
+ '.txt' : 'text/plain',
+ '.ustar' : 'application/x-ustar',
+ '.vcf' : 'text/x-vcard',
+ '.wav' : 'audio/x-wav',
+ '.webm' : 'video/webm',
+ '.wiz' : 'application/msword',
+ '.wsdl' : 'application/xml',
+ '.xbm' : 'image/x-xbitmap',
+ '.xlb' : 'application/vnd.ms-excel',
+ # Duplicates :(
+ '.xls' : 'application/excel',
+ '.xls' : 'application/vnd.ms-excel',
+ '.xml' : 'text/xml',
+ '.xpdl' : 'application/xml',
+ '.xpm' : 'image/x-xpixmap',
+ '.xsl' : 'application/xml',
+ '.xwd' : 'image/x-xwindowdump',
+ '.zip' : 'application/zip',
+ }
+
+ # These are non-standard types, commonly found in the wild. They will
+ # only match if strict=0 flag is given to the API methods.
+
+ # Please sort these too
+ common_types = {
+ '.jpg' : 'image/jpg',
+ '.mid' : 'audio/midi',
+ '.midi': 'audio/midi',
+ '.pct' : 'image/pict',
+ '.pic' : 'image/pict',
+ '.pict': 'image/pict',
+ '.rtf' : 'application/rtf',
+ '.xul' : 'text/xul'
+ }
+
+
+_default_mime_types()