1 files changed, 553 insertions, 0 deletions
diff --git a/modules/language/python/module/mimetypes.py b/modules/language/python/module/mimetypes.py
new file mode 100644
index 0000000..76a5e87
--- /dev/null
+++ b/modules/language/python/module/mimetypes.py
@@ -0,0 +1,553 @@
+module(mimetypes)
+"""Guess the MIME type of a file.
+
+This module defines two useful functions:
+
+guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
+
+guess_extension(type, strict=True) -- guess the extension for a given MIME type.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffix_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles (on Windows, the
+  default values are taken from the registry)
+read_mime_types(file) -- parse one file, return a dictionary or None
+"""
+
+import os
+import sys
+import posixpath
+import urllib.parse
+try:
+    import winreg as _winreg
+except ImportError:
+    _winreg = None
+
+__all__ = [
+    "knownfiles", "inited", "MimeTypes",
+    "guess_type", "guess_all_extensions", "guess_extension",
+    "add_type", "init", "read_mime_types",
+    "suffix_map", "encodings_map", "types_map", "common_types"
+]
+
+knownfiles = [
+    "/etc/mime.types",
+    "/etc/httpd/mime.types",                    # Mac OS X
+    "/etc/httpd/conf/mime.types",               # Apache
+    "/etc/apache/mime.types",                   # Apache 1
+    "/etc/apache2/mime.types",                  # Apache 2
+    "/usr/local/etc/httpd/conf/mime.types",
+    "/usr/local/lib/netscape/mime.types",
+    "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
+    "/usr/local/etc/mime.types",                # Apache 1.3
+    ]
+
+inited = False
+_db = None
+
+
+class MimeTypes:
+    """MIME-types datastore.
+
+    This datastore can handle information from mime.types-style files
+    and supports basic determination of MIME type from a filename or
+    URL, and can guess a reasonable extension given a MIME type.
+    """
+
+    def __init__(self, filenames=(), strict=True):
+        if not inited:
+            init()
+        self.encodings_map = encodings_map.copy()
+        self.suffix_map = suffix_map.copy()
+        self.types_map = ({}, {}) # dict for (non-strict, strict)
+        self.types_map_inv = ({}, {})
+        for (ext, type) in types_map.items():
+            self.add_type(type, ext, True)
+        for (ext, type) in common_types.items():
+            self.add_type(type, ext, False)
+        for name in filenames:
+            self.read(name, strict)
+
+    def add_type(self, type, ext, strict=True):
+        """Add a mapping between a type and an extension.
+
+        When the extension is already known, the new
+        type will replace the old one. When the type
+        is already known the extension will be added
+        to the list of known extensions.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        self.types_map[strict][ext] = type
+        exts = self.types_map_inv[strict].setdefault(type, [])
+        if ext not in exts:
+            exts.append(ext)
+
+    def guess_type(self, url, strict=True):
+        """Guess the type of a file based on its URL.
+
+        Return value is a tuple (type, encoding) where type is None if
+        the type can't be guessed (no or unknown suffix) or a string
+        of the form type/subtype, usable for a MIME Content-type
+        header; and encoding is None for no encoding or the name of
+        the program used to encode (e.g. compress or gzip).  The
+        mappings are table driven.  Encoding suffixes are case
+        sensitive; type suffixes are first tried case sensitive, then
+        case insensitive.
+
+        The suffixes .tgz, .taz and .tz (case sensitive!) are all
+        mapped to '.tar.gz'.  (This is table-driven too, using the
+        dictionary suffix_map.)
+
+        Optional `strict' argument when False adds a bunch of commonly found,
+        but non-standard types.
+        """
+        scheme, url = urllib.parse.splittype(url)
+        if scheme == 'data':
+            # syntax of data URLs:
+            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+            # mediatype := [ type "/" subtype ] *( ";" parameter )
+            # data      := *urlchar
+            # parameter := attribute "=" value
+            # type/subtype defaults to "text/plain"
+            comma = url.find(',')
+            if comma < 0:
+                # bad data URL
+                return None, None
+            semi = url.find(';', 0, comma)
+            if semi >= 0:
+                type = url[:semi]
+            else:
+                type = url[:comma]
+            if '=' in type or '/' not in type:
+                type = 'text/plain'
+            return type, None           # never compressed, so encoding is None
+        base, ext = posixpath.splitext(url)
+        while ext in self.suffix_map:
+            base, ext = posixpath.splitext(base + self.suffix_map[ext])
+        if ext in self.encodings_map:
+            encoding = self.encodings_map[ext]
+            base, ext = posixpath.splitext(base)
+        else:
+            encoding = None
+        types_map = self.types_map[True]
+        if ext in types_map:
+            return types_map[ext], encoding
+        elif ext.lower() in types_map:
+            return types_map[ext.lower()], encoding
+        elif strict:
+            return None, encoding
+        types_map = self.types_map[False]
+        if ext in types_map:
+            return types_map[ext], encoding
+        elif ext.lower() in types_map:
+            return types_map[ext.lower()], encoding
+        else:
+            return None, encoding
+
+    def guess_all_extensions(self, type, strict=True):
+        """Guess the extensions for a file based on its MIME type.
+
+        Return value is a list of strings giving the possible filename
+        extensions, including the leading dot ('.').  The extension is not
+        guaranteed to have been associated with any particular data stream,
+        but would be mapped to the MIME type `type' by guess_type().
+
+        Optional `strict' argument when false adds a bunch of commonly found,
+        but non-standard types.
+        """
+        type = type.lower()
+        extensions = self.types_map_inv[True].get(type, [])
+        if not strict:
+            for ext in self.types_map_inv[False].get(type, []):
+                if ext not in extensions:
+                    extensions.append(ext)
+        return extensions
+
+    def guess_extension(self, type, strict=True):
+        """Guess the extension for a file based on its MIME type.
+
+        Return value is a string giving a filename extension,
+        including the leading dot ('.').  The extension is not
+        guaranteed to have been associated with any particular data
+        stream, but would be mapped to the MIME type `type' by
+        guess_type().  If no extension can be guessed for `type', None
+        is returned.
+
+        Optional `strict' argument when false adds a bunch of commonly found,
+        but non-standard types.
+        """
+        extensions = self.guess_all_extensions(type, strict)
+        if not extensions:
+            return None
+        return extensions[0]
+
+    def read(self, filename, strict=True):
+        """
+        Read a single mime.types-format file, specified by pathname.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        with open(filename, encoding='utf-8') as fp:
+            self.readfp(fp, strict)
+
+    def readfp(self, fp, strict=True):
+        """
+        Read a single mime.types-format file.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        while 1:
+            line = fp.readline()
+            if not line:
+                break
+            words = line.split()
+            for i in range(len(words)):
+                if words[i][0] == '#':
+                    del words[i:]
+                    break
+            if not words:
+                continue
+            type, suffixes = words[0], words[1:]
+            for suff in suffixes:
+                self.add_type(type, '.' + suff, strict)
+
+    def read_windows_registry(self, strict=True):
+        """
+        Load the MIME types database from Windows registry.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+
+        # Windows only
+        if not _winreg:
+            return
+
+        def enum_types(mimedb):
+            i = 0
+            while True:
+                try:
+                    ctype = _winreg.EnumKey(mimedb, i)
+                except EnvironmentError:
+                    break
+                else:
+                    if '\0' not in ctype:
+                        yield ctype
+                i += 1
+
+        with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
+            for subkeyname in enum_types(hkcr):
+                try:
+                    with _winreg.OpenKey(hkcr, subkeyname) as subkey:
+                        # Only check file extensions
+                        if not subkeyname.startswith("."):
+                            continue
+                        # raises EnvironmentError if no 'Content Type' value
+                        mimetype, datatype = _winreg.QueryValueEx(
+                            subkey, 'Content Type')
+                        if datatype != _winreg.REG_SZ:
+                            continue
+                        self.add_type(mimetype, subkeyname, strict)
+                except EnvironmentError:
+                    continue
+
+def guess_type(url, strict=True):
+    """Guess the type of a file based on its URL.
+
+    Return value is a tuple (type, encoding) where type is None if the
+    type can't be guessed (no or unknown suffix) or a string of the
+    form type/subtype, usable for a MIME Content-type header; and
+    encoding is None for no encoding or the name of the program used
+    to encode (e.g. compress or gzip).  The mappings are table
+    driven.  Encoding suffixes are case sensitive; type suffixes are
+    first tried case sensitive, then case insensitive.
+
+    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+    to ".tar.gz".  (This is table-driven too, using the dictionary
+    suffix_map).
+
+    Optional `strict' argument when false adds a bunch of commonly found, but
+    non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_type(url, strict)
+
+
+def guess_all_extensions(type, strict=True):
+    """Guess the extensions for a file based on its MIME type.
+
+    Return value is a list of strings giving the possible filename
+    extensions, including the leading dot ('.').  The extension is not
+    guaranteed to have been associated with any particular data
+    stream, but would be mapped to the MIME type `type' by
+    guess_type().  If no extension can be guessed for `type', None
+    is returned.
+
+    Optional `strict' argument when false adds a bunch of commonly found,
+    but non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_all_extensions(type, strict)
+
+def guess_extension(type, strict=True):
+    """Guess the extension for a file based on its MIME type.
+
+    Return value is a string giving a filename extension, including the
+    leading dot ('.').  The extension is not guaranteed to have been
+    associated with any particular data stream, but would be mapped to the
+    MIME type `type' by guess_type().  If no extension can be guessed for
+    `type', None is returned.
+
+    Optional `strict' argument when false adds a bunch of commonly found,
+    but non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_extension(type, strict)
+
+def add_type(type, ext, strict=True):
+    """Add a mapping between a type and an extension.
+
+    When the extension is already known, the new
+    type will replace the old one. When the type
+    is already known the extension will be added
+    to the list of known extensions.
+
+    If strict is true, information will be added to
+    list of standard types, else to the list of non-standard
+    types.
+    """
+    if _db is None:
+        init()
+    return _db.add_type(type, ext, strict)
+
+
+def init(files=None):
+    global suffix_map, types_map, encodings_map, common_types
+    global inited, _db
+    inited = True    # so that MimeTypes.__init__() doesn't call us again
+    db = MimeTypes()
+    if files is None:
+        if _winreg:
+            db.read_windows_registry()
+        files = knownfiles
+    for file in files:
+        if os.path.isfile(file):
+            db.read(file)
+    encodings_map = db.encodings_map
+    suffix_map = db.suffix_map
+    types_map = db.types_map[True]
+    common_types = db.types_map[False]
+    # Make the DB a global variable now that it is fully initialized
+    _db = db
+
+
+def read_mime_types(file):
+    try:
+        f = open(file)
+    except OSError:
+        return None
+    with f:
+        db = MimeTypes()
+        db.readfp(f, True)
+        return db.types_map[True]
+
+
+def _default_mime_types():
+    global suffix_map
+    global encodings_map
+    global types_map
+    global common_types
+
+    suffix_map = {
+        '.svgz': '.svg.gz',
+        '.tgz': '.tar.gz',
+        '.taz': '.tar.gz',
+        '.tz': '.tar.gz',
+        '.tbz2': '.tar.bz2',
+        '.txz': '.tar.xz',
+        }
+
+    encodings_map = {
+        '.gz': 'gzip',
+        '.Z': 'compress',
+        '.bz2': 'bzip2',
+        '.xz': 'xz',
+        }
+
+    # Before adding new types, make sure they are either registered with IANA,
+    # at http://www.iana.org/assignments/media-types
+    # or extensions, i.e. using the x- prefix
+
+    # If you add to these, please keep them sorted!
+    types_map = {
+        '.a'      : 'application/octet-stream',
+        '.ai'     : 'application/postscript',
+        '.aif'    : 'audio/x-aiff',
+        '.aifc'   : 'audio/x-aiff',
+        '.aiff'   : 'audio/x-aiff',
+        '.au'     : 'audio/basic',
+        '.avi'    : 'video/x-msvideo',
+        '.bat'    : 'text/plain',
+        '.bcpio'  : 'application/x-bcpio',
+        '.bin'    : 'application/octet-stream',
+        '.bmp'    : 'image/x-ms-bmp',
+        '.c'      : 'text/plain',
+        # Duplicates :(
+        '.cdf'    : 'application/x-cdf',
+        '.cdf'    : 'application/x-netcdf',
+        '.cpio'   : 'application/x-cpio',
+        '.csh'    : 'application/x-csh',
+        '.css'    : 'text/css',
+        '.csv'    : 'text/csv',
+        '.dll'    : 'application/octet-stream',
+        '.doc'    : 'application/msword',
+        '.dot'    : 'application/msword',
+        '.dvi'    : 'application/x-dvi',
+        '.eml'    : 'message/rfc822',
+        '.eps'    : 'application/postscript',
+        '.etx'    : 'text/x-setext',
+        '.exe'    : 'application/octet-stream',
+        '.gif'    : 'image/gif',
+        '.gtar'   : 'application/x-gtar',
+        '.h'      : 'text/plain',
+        '.hdf'    : 'application/x-hdf',
+        '.htm'    : 'text/html',
+        '.html'   : 'text/html',
+        '.ico'    : 'image/vnd.microsoft.icon',
+        '.ief'    : 'image/ief',
+        '.jpe'    : 'image/jpeg',
+        '.jpeg'   : 'image/jpeg',
+        '.jpg'    : 'image/jpeg',
+        '.js'     : 'application/javascript',
+        '.json'   : 'application/json',
+        '.ksh'    : 'text/plain',
+        '.latex'  : 'application/x-latex',
+        '.m1v'    : 'video/mpeg',
+        '.m3u'    : 'application/vnd.apple.mpegurl',
+        '.m3u8'   : 'application/vnd.apple.mpegurl',
+        '.man'    : 'application/x-troff-man',
+        '.me'     : 'application/x-troff-me',
+        '.mht'    : 'message/rfc822',
+        '.mhtml'  : 'message/rfc822',
+        '.mif'    : 'application/x-mif',
+        '.mov'    : 'video/quicktime',
+        '.movie'  : 'video/x-sgi-movie',
+        '.mp2'    : 'audio/mpeg',
+        '.mp3'    : 'audio/mpeg',
+        '.mp4'    : 'video/mp4',
+        '.mpa'    : 'video/mpeg',
+        '.mpe'    : 'video/mpeg',
+        '.mpeg'   : 'video/mpeg',
+        '.mpg'    : 'video/mpeg',
+        '.ms'     : 'application/x-troff-ms',
+        '.nc'     : 'application/x-netcdf',
+        '.nws'    : 'message/rfc822',
+        '.o'      : 'application/octet-stream',
+        '.obj'    : 'application/octet-stream',
+        '.oda'    : 'application/oda',
+        '.p12'    : 'application/x-pkcs12',
+        '.p7c'    : 'application/pkcs7-mime',
+        '.pbm'    : 'image/x-portable-bitmap',
+        '.pdf'    : 'application/pdf',
+        '.pfx'    : 'application/x-pkcs12',
+        '.pgm'    : 'image/x-portable-graymap',
+        '.pl'     : 'text/plain',
+        '.png'    : 'image/png',
+        '.pnm'    : 'image/x-portable-anymap',
+        '.pot'    : 'application/vnd.ms-powerpoint',
+        '.ppa'    : 'application/vnd.ms-powerpoint',
+        '.ppm'    : 'image/x-portable-pixmap',
+        '.pps'    : 'application/vnd.ms-powerpoint',
+        '.ppt'    : 'application/vnd.ms-powerpoint',
+        '.ps'     : 'application/postscript',
+        '.pwz'    : 'application/vnd.ms-powerpoint',
+        '.py'     : 'text/x-python',
+        '.pyc'    : 'application/x-python-code',
+        '.pyo'    : 'application/x-python-code',
+        '.qt'     : 'video/quicktime',
+        '.ra'     : 'audio/x-pn-realaudio',
+        '.ram'    : 'application/x-pn-realaudio',
+        '.ras'    : 'image/x-cmu-raster',
+        '.rdf'    : 'application/xml',
+        '.rgb'    : 'image/x-rgb',
+        '.roff'   : 'application/x-troff',
+        '.rtx'    : 'text/richtext',
+        '.sgm'    : 'text/x-sgml',
+        '.sgml'   : 'text/x-sgml',
+        '.sh'     : 'application/x-sh',
+        '.shar'   : 'application/x-shar',
+        '.snd'    : 'audio/basic',
+        '.so'     : 'application/octet-stream',
+        '.src'    : 'application/x-wais-source',
+        '.sv4cpio': 'application/x-sv4cpio',
+        '.sv4crc' : 'application/x-sv4crc',
+        '.svg'    : 'image/svg+xml',
+        '.swf'    : 'application/x-shockwave-flash',
+        '.t'      : 'application/x-troff',
+        '.tar'    : 'application/x-tar',
+        '.tcl'    : 'application/x-tcl',
+        '.tex'    : 'application/x-tex',
+        '.texi'   : 'application/x-texinfo',
+        '.texinfo': 'application/x-texinfo',
+        '.tif'    : 'image/tiff',
+        '.tiff'   : 'image/tiff',
+        '.tr'     : 'application/x-troff',
+        '.tsv'    : 'text/tab-separated-values',
+        '.txt'    : 'text/plain',
+        '.ustar'  : 'application/x-ustar',
+        '.vcf'    : 'text/x-vcard',
+        '.wav'    : 'audio/x-wav',
+        '.webm'   : 'video/webm',
+        '.wiz'    : 'application/msword',
+        '.wsdl'   : 'application/xml',
+        '.xbm'    : 'image/x-xbitmap',
+        '.xlb'    : 'application/vnd.ms-excel',
+        # Duplicates :(
+        '.xls'    : 'application/excel',
+        '.xls'    : 'application/vnd.ms-excel',
+        '.xml'    : 'text/xml',
+        '.xpdl'   : 'application/xml',
+        '.xpm'    : 'image/x-xpixmap',
+        '.xsl'    : 'application/xml',
+        '.xwd'    : 'image/x-xwindowdump',
+        '.zip'    : 'application/zip',
+        }
+
+    # These are non-standard types, commonly found in the wild.  They will
+    # only match if strict=0 flag is given to the API methods.
+
+    # Please sort these too
+    common_types = {
+        '.jpg' : 'image/jpg',
+        '.mid' : 'audio/midi',
+        '.midi': 'audio/midi',
+        '.pct' : 'image/pict',
+        '.pic' : 'image/pict',
+        '.pict': 'image/pict',
+        '.rtf' : 'application/rtf',
+        '.xul' : 'text/xul'
+        }
+
+
+_default_mime_types()