mime types

author: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-10 18:02:06 +0200
committer: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-10 18:02:06 +0200
commit: b13d359c27a03c19ac113ced417d00155942ef11 (patch)
tree: 6baa74737c8de1924352be9164a64c4c5c7399ee
parent: 964eac0a27214f12b11cd867aabe07d3de868ea3 (diff)
8 files changed, 675 insertions, 0 deletions
diff --git a/modules/language/python/module/email/mime/audio.py b/modules/language/python/module/email/mime/audio.py
new file mode 100644
index 0000000..ccd08fe
--- /dev/null
+++ b/modules/language/python/module/email/mime/audio.py
@@ -0,0 +1,76 @@
+module(email,mime,audio)
+
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from io import BytesIO
+import email.encoders as encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+_sndhdr_MIMEmap = {'au'  : 'basic',
+                   'wav' :'x-wav',
+                   'aiff':'x-aiff',
+                   'aifc':'x-aiff',
+                   }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+    """Try to identify a sound file type.
+
+    sndhdr.what() has a pretty cruddy interface, unfortunately.  This is why
+    we re-do it here.  It would be easier to reverse engineer the Unix 'file'
+    command and use the standard 'magic' file, as shipped with a modern Unix.
+    """
+    hdr = data[:512]
+    fakefile = BytesIO(hdr)
+    for testfn in sndhdr.tests:
+        res = testfn(hdr, fakefile)
+        if res is not None:
+            return _sndhdr_MIMEmap.get(res[0])
+    return None
+
+
+
+class MIMEAudio(MIMENonMultipart):
+    """Class for generating audio/* MIME documents."""
+
+    def __init__(self, _audiodata, _subtype=None,
+                 _encoder=encoders.encode_base64, *, policy=None, **_params):
+        """Create an audio/* type MIME document.
+
+        _audiodata is a string containing the raw audio data.  If this data
+        can be decoded by the standard Python `sndhdr' module, then the
+        subtype will be automatically included in the Content-Type header.
+        Otherwise, you can specify  the specific audio subtype via the
+        _subtype parameter.  If _subtype is not given, and no subtype can be
+        guessed, a TypeError is raised.
+
+        _encoder is a function which will perform the actual encoding for
+        transport of the image data.  It takes one argument, which is this
+        Image instance.  It should use get_payload() and set_payload() to
+        change the payload to the encoded form.  It should also add any
+        Content-Transfer-Encoding or other headers to the message as
+        necessary.  The default encoding is Base64.
+
+        Any additional keyword arguments are passed to the base class
+        constructor, which turns them into parameters on the Content-Type
+        header.
+        """
+        if _subtype is None:
+            _subtype = _whatsnd(_audiodata)
+        if _subtype is None:
+            raise TypeError('Could not find audio MIME subtype')
+        MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy,
+                                  **_params)
+        self.set_payload(_audiodata)
+        _encoder(self)
diff --git a/modules/language/python/module/email/mime/base.py b/modules/language/python/module/email/mime/base.py
new file mode 100644
index 0000000..3e269bd
--- /dev/null
+++ b/modules/language/python/module/email/mime/base.py
@@ -0,0 +1,30 @@
+module(email,mime,base)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+
+__all__ = ['MIMEBase']
+
+import email.policy
+import email.message as message
+
+
+
+class MIMEBase(message.Message):
+    """Base class for MIME specializations."""
+
+    def __init__(self, _maintype, _subtype, *, policy=None, **_params):
+        """This constructor adds a Content-Type: and a MIME-Version: header.
+
+        The Content-Type: header is taken from the _maintype and _subtype
+        arguments.  Additional parameters for this header are taken from the
+        keyword arguments.
+        """
+        if policy is None:
+            policy = email.policy.compat32
+        message.Message.__init__(self, policy=policy)
+        ctype = '%s/%s' % (_maintype, _subtype)
+        self.add_header('Content-Type', ctype, **_params)
+        self['MIME-Version'] = '1.0'
diff --git a/modules/language/python/module/email/mime/image.py b/modules/language/python/module/email/mime/image.py
new file mode 100644
index 0000000..5346dac
--- /dev/null
+++ b/modules/language/python/module/email/mime/image.py
@@ -0,0 +1,49 @@
+module(email,mime,image)
+
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+import email.encoders as encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEImage(MIMENonMultipart):
+    """Class for generating image/* type MIME documents."""
+
+    def __init__(self, _imagedata, _subtype=None,
+                 _encoder=encoders.encode_base64, *, policy=None, **_params):
+        """Create an image/* type MIME document.
+
+        _imagedata is a string containing the raw image data.  If this data
+        can be decoded by the standard Python `imghdr' module, then the
+        subtype will be automatically included in the Content-Type header.
+        Otherwise, you can specify the specific image subtype via the _subtype
+        parameter.
+
+        _encoder is a function which will perform the actual encoding for
+        transport of the image data.  It takes one argument, which is this
+        Image instance.  It should use get_payload() and set_payload() to
+        change the payload to the encoded form.  It should also add any
+        Content-Transfer-Encoding or other headers to the message as
+        necessary.  The default encoding is Base64.
+
+        Any additional keyword arguments are passed to the base class
+        constructor, which turns them into parameters on the Content-Type
+        header.
+        """
+        if _subtype is None:
+            _subtype = imghdr.what(None, _imagedata)
+        if _subtype is None:
+            raise TypeError('Could not guess image MIME subtype')
+        MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy,
+                                  **_params)
+        self.set_payload(_imagedata)
+        _encoder(self)
diff --git a/modules/language/python/module/email/mime/message.py b/modules/language/python/module/email/mime/message.py
new file mode 100644
index 0000000..e215d3e
--- /dev/null
+++ b/modules/language/python/module/email/mime/message.py
@@ -0,0 +1,36 @@
+module(email,mime,message)
+
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+
+__all__ = ['MIMEMessage']
+
+import email.message as message
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEMessage(MIMENonMultipart):
+    """Class representing message/* MIME documents."""
+
+    def __init__(self, _msg, _subtype='rfc822', *, policy=None):
+        """Create a message/* type MIME document.
+
+        _msg is a message object and must be an instance of Message, or a
+        derived class of Message, otherwise a TypeError is raised.
+
+        Optional _subtype defines the subtype of the contained message.  The
+        default is "rfc822" (this is defined by the MIME standard, even though
+        the term "rfc822" is technically outdated by RFC 2822).
+        """
+        MIMENonMultipart.__init__(self, 'message', _subtype, policy=policy)
+        if not isinstance(_msg, message.Message):
+            raise TypeError('Argument is not an instance of Message')
+        # It's convenient to use this base class method.  We need to do it
+        # this way or we'll get an exception
+        message.Message.attach(self, _msg)
+        # And be sure our default type is set correctly
+        self.set_default_type('message/rfc822')
diff --git a/modules/language/python/module/email/mime/nonmultipart.py b/modules/language/python/module/email/mime/nonmultipart.py
new file mode 100644
index 0000000..aa5d06a
--- /dev/null
+++ b/modules/language/python/module/email/mime/nonmultipart.py
@@ -0,0 +1,24 @@
+module(email,mime,nonmultipart)
+
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+
+__all__ = ['MIMENonMultipart']
+
+import email.errors as errors
+from email.mime.base import MIMEBase
+
+
+
+class MIMENonMultipart(MIMEBase):
+    """Base class for MIME non-multipart type messages."""
+
+    def attach(self, payload):
+        # The public API prohibits attaching multiple subparts to MIMEBase
+        # derived subtypes since none of them are, by definition, of content
+        # type multipart/*
+        raise errors.MultipartConversionError(
+            'Cannot attach additional subparts to non-multipart/*')
diff --git a/modules/language/python/module/email/mime/text.py b/modules/language/python/module/email/mime/text.py
new file mode 100644
index 0000000..755b140
--- /dev/null
+++ b/modules/language/python/module/email/mime/text.py
@@ -0,0 +1,43 @@
+module(email,mime,text)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+
+__all__ = ['MIMEText']
+
+from email.charset import Charset
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEText(MIMENonMultipart):
+    """Class for generating text/* type MIME documents."""
+
+    def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None):
+        """Create a text/* type MIME document.
+
+        _text is the string for this message object.
+
+        _subtype is the MIME sub content type, defaulting to "plain".
+
+        _charset is the character set parameter added to the Content-Type
+        header.  This defaults to "us-ascii".  Note that as a side-effect, the
+        Content-Transfer-Encoding header will also be set.
+        """
+
+        # If no _charset was specified, check to see if there are non-ascii
+        # characters present. If not, use 'us-ascii', otherwise use utf-8.
+        # XXX: This can be removed once #7304 is fixed.
+        if _charset is None:
+            try:
+                _text.encode('us-ascii')
+                _charset = 'us-ascii'
+            except UnicodeEncodeError:
+                _charset = 'utf-8'
+
+        MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy,
+                                  **{'charset': str(_charset)})
+
+        self.set_payload(_text, _charset)
diff --git a/modules/language/python/module/imghdr.py b/modules/language/python/module/imghdr.py
new file mode 100644
index 0000000..8820a9d
--- /dev/null
+++ b/modules/language/python/module/imghdr.py
@@ -0,0 +1,170 @@
+module(imghdr)
+
+"""Recognize image file formats based on their first few bytes."""
+
+from os import PathLike
+
+__all__ = ["what"]
+
+#-------------------------#
+# Recognize image headers #
+#-------------------------#
+
+def what(file, h=None):
+    f = None
+    try:
+        if h is None:
+            if isinstance(file, (str, PathLike)):
+                f = open(file, 'rb')
+                h = f.read(32)
+            else:
+                location = file.tell()
+                h = file.read(32)
+                file.seek(location)
+        for tf in tests:
+            res = tf(h, f)
+            if res:
+                return res
+    finally:
+        if f: f.close()
+    return None
+
+
+#---------------------------------#
+# Subroutines per image file type #
+#---------------------------------#
+
+tests = []
+
+def test_jpeg(h, f):
+    """JPEG data in JFIF or Exif format"""
+    if h[6:10] in (b'JFIF', b'Exif'):
+        return 'jpeg'
+
+tests.append(test_jpeg)
+
+def test_png(h, f):
+    if h.startswith(b'\211PNG\r\n\032\n'):
+        return 'png'
+
+tests.append(test_png)
+
+def test_gif(h, f):
+    """GIF ('87 and '89 variants)"""
+    if h[:6] in (b'GIF87a', b'GIF89a'):
+        return 'gif'
+
+tests.append(test_gif)
+
+def test_tiff(h, f):
+    """TIFF (can be in Motorola or Intel byte order)"""
+    if h[:2] in (b'MM', b'II'):
+        return 'tiff'
+
+tests.append(test_tiff)
+
+def test_rgb(h, f):
+    """SGI image library"""
+    if h.startswith(b'\001\332'):
+        return 'rgb'
+
+tests.append(test_rgb)
+
+def test_pbm(h, f):
+    """PBM (portable bitmap)"""
+    if len(h) >= 3 and \
+        h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r':
+        return 'pbm'
+
+tests.append(test_pbm)
+
+def test_pgm(h, f):
+    """PGM (portable graymap)"""
+    if len(h) >= 3 and \
+        h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r':
+        return 'pgm'
+
+tests.append(test_pgm)
+
+def test_ppm(h, f):
+    """PPM (portable pixmap)"""
+    if len(h) >= 3 and \
+        h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r':
+        return 'ppm'
+
+tests.append(test_ppm)
+
+def test_rast(h, f):
+    """Sun raster file"""
+    if h.startswith(b'\x59\xA6\x6A\x95'):
+        return 'rast'
+
+tests.append(test_rast)
+
+def test_xbm(h, f):
+    """X bitmap (X10 or X11)"""
+    if h.startswith(b'#define '):
+        return 'xbm'
+
+tests.append(test_xbm)
+
+def test_bmp(h, f):
+    if h.startswith(b'BM'):
+        return 'bmp'
+
+tests.append(test_bmp)
+
+def test_webp(h, f):
+    if h.startswith(b'RIFF') and h[8:12] == b'WEBP':
+        return 'webp'
+
+tests.append(test_webp)
+
+def test_exr(h, f):
+    if h.startswith(b'\x76\x2f\x31\x01'):
+        return 'exr'
+
+tests.append(test_exr)
+
+#--------------------#
+# Small test program #
+#--------------------#
+
+def test():
+    import sys
+    recursive = 0
+    if sys.argv[1:] and sys.argv[1] == '-r':
+        del sys.argv[1:2]
+        recursive = 1
+    try:
+        if sys.argv[1:]:
+            testall(sys.argv[1:], recursive, 1)
+        else:
+            testall(['.'], recursive, 1)
+    except KeyboardInterrupt:
+        sys.stderr.write('\n[Interrupted]\n')
+        sys.exit(1)
+
+def testall(list, recursive, toplevel):
+    import sys
+    import os
+    for filename in list:
+        if os.path.isdir(filename):
+            print(filename + '/:', end=' ')
+            if recursive or toplevel:
+                print('recursing down:')
+                import glob
+                names = glob.glob(os.path.join(filename, '*'))
+                testall(names, recursive, 0)
+            else:
+                print('*** directory (use -r) ***')
+        else:
+            print(filename + ':', end=' ')
+            sys.stdout.flush()
+            try:
+                print(what(filename))
+            except OSError:
+                print('*** not found ***')
+
+if __name__ == '__main__':
+    test()
diff --git a/modules/language/python/module/sndhdr.py b/modules/language/python/module/sndhdr.py
new file mode 100644
index 0000000..42b3d1a
--- /dev/null
+++ b/modules/language/python/module/sndhdr.py
@@ -0,0 +1,247 @@
+module(sndhdr)
+
+"""Routines to help recognizing sound files.
+
+Function whathdr() recognizes various types of sound file headers.
+It understands almost all headers that SOX can decode.
+
+The return tuple contains the following items, in this order:
+- file type (as SOX understands it)
+- sampling rate (0 if unknown or hard to decode)
+- number of channels (0 if unknown or hard to decode)
+- number of frames in the file (-1 if unknown or hard to decode)
+- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
+
+If the file doesn't have a recognizable type, it returns None.
+If the file can't be opened, OSError is raised.
+
+To compute the total time, divide the number of frames by the
+sampling rate (a frame contains a sample for each channel).
+
+Function what() calls whathdr().  (It used to also use some
+heuristics for raw data, but this doesn't work very well.)
+
+Finally, the function test() is a simple main program that calls
+what() for all files mentioned on the argument list.  For directory
+arguments it calls what() for all files in that directory.  Default
+argument is "." (testing all files in the current directory).  The
+option -r tells it to recurse down directories found inside
+explicitly given directories.
+"""
+
+# The file structure is top-down except that the test program and its
+# subroutine come last.
+
+__all__ = ['what', 'whathdr']
+
+from collections import namedtuple
+
+SndHeaders = namedtuple('SndHeaders',
+                        'filetype framerate nchannels nframes sampwidth')
+
+def what(filename):
+    """Guess the type of a sound file."""
+    res = whathdr(filename)
+    return res
+
+
+def whathdr(filename):
+    """Recognize sound headers."""
+    with open(filename, 'rb') as f:
+        h = f.read(512)
+        for tf in tests:
+            res = tf(h, f)
+            if res:
+                return SndHeaders(*res)
+        return None
+
+
+#-----------------------------------#
+# Subroutines per sound header type #
+#-----------------------------------#
+
+tests = []
+
+def test_aifc(h, f):
+    import aifc
+    if not h.startswith(b'FORM'):
+        return None
+    if h[8:12] == b'AIFC':
+        fmt = 'aifc'
+    elif h[8:12] == b'AIFF':
+        fmt = 'aiff'
+    else:
+        return None
+    f.seek(0)
+    try:
+        a = aifc.open(f, 'r')
+    except (EOFError, aifc.Error):
+        return None
+    return (fmt, a.getframerate(), a.getnchannels(),
+            a.getnframes(), 8 * a.getsampwidth())
+
+tests.append(test_aifc)
+
+
+def test_au(h, f):
+    if h.startswith(b'.snd'):
+        func = get_long_be
+    elif h[:4] in (b'\0ds.', b'dns.'):
+        func = get_long_le
+    else:
+        return None
+    filetype = 'au'
+    hdr_size = func(h[4:8])
+    data_size = func(h[8:12])
+    encoding = func(h[12:16])
+    rate = func(h[16:20])
+    nchannels = func(h[20:24])
+    sample_size = 1 # default
+    if encoding == 1:
+        sample_bits = 'U'
+    elif encoding == 2:
+        sample_bits = 8
+    elif encoding == 3:
+        sample_bits = 16
+        sample_size = 2
+    else:
+        sample_bits = '?'
+    frame_size = sample_size * nchannels
+    if frame_size:
+        nframe = data_size / frame_size
+    else:
+        nframe = -1
+    return filetype, rate, nchannels, nframe, sample_bits
+
+tests.append(test_au)
+
+
+def test_hcom(h, f):
+    if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
+        return None
+    divisor = get_long_be(h[144:148])
+    if divisor:
+        rate = 22050 / divisor
+    else:
+        rate = 0
+    return 'hcom', rate, 1, -1, 8
+
+tests.append(test_hcom)
+
+
+def test_voc(h, f):
+    if not h.startswith(b'Creative Voice File\032'):
+        return None
+    sbseek = get_short_le(h[20:22])
+    rate = 0
+    if 0 <= sbseek < 500 and h[sbseek] == 1:
+        ratecode = 256 - h[sbseek+4]
+        if ratecode:
+            rate = int(1000000.0 / ratecode)
+    return 'voc', rate, 1, -1, 8
+
+tests.append(test_voc)
+
+
+def test_wav(h, f):
+    import wave
+    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
+    if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
+        return None
+    f.seek(0)
+    try:
+        w = wave.openfp(f, 'r')
+    except (EOFError, wave.Error):
+        return None
+    return ('wav', w.getframerate(), w.getnchannels(),
+                   w.getnframes(), 8*w.getsampwidth())
+
+tests.append(test_wav)
+
+
+def test_8svx(h, f):
+    if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
+        return None
+    # Should decode it to get #channels -- assume always 1
+    return '8svx', 0, 1, 0, 8
+
+tests.append(test_8svx)
+
+
+def test_sndt(h, f):
+    if h.startswith(b'SOUND'):
+        nsamples = get_long_le(h[8:12])
+        rate = get_short_le(h[20:22])
+        return 'sndt', rate, 1, nsamples, 8
+
+tests.append(test_sndt)
+
+
+def test_sndr(h, f):
+    if h.startswith(b'\0\0'):
+        rate = get_short_le(h[2:4])
+        if 4000 <= rate <= 25000:
+            return 'sndr', rate, 1, -1, 8
+
+tests.append(test_sndr)
+
+
+#-------------------------------------------#
+# Subroutines to extract numbers from bytes #
+#-------------------------------------------#
+
+def get_long_be(b):
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
+
+def get_long_le(b):
+    return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
+
+def get_short_be(b):
+    return (b[0] << 8) | b[1]
+
+def get_short_le(b):
+    return (b[1] << 8) | b[0]
+
+
+#--------------------#
+# Small test program #
+#--------------------#
+
+def test():
+    import sys
+    recursive = 0
+    if sys.argv[1:] and sys.argv[1] == '-r':
+        del sys.argv[1:2]
+        recursive = 1
+    try:
+        if sys.argv[1:]:
+            testall(sys.argv[1:], recursive, 1)
+        else:
+            testall(['.'], recursive, 1)
+    except KeyboardInterrupt:
+        sys.stderr.write('\n[Interrupted]\n')
+        sys.exit(1)
+
+def testall(list, recursive, toplevel):
+    import sys
+    import os
+    for filename in list:
+        if os.path.isdir(filename):
+            print(filename + '/:', end=' ')
+            if recursive or toplevel:
+                print('recursing down:')
+                import glob
+                names = glob.glob(os.path.join(filename, '*'))
+                testall(names, recursive, 0)
+            else:
+                print('*** directory (use -r) ***')
+        else:
+            print(filename + ':', end=' ')
+            sys.stdout.flush()
+            try:
+                print(what(filename))
+            except OSError:
+                print('*** not found ***')
+
+if __name__ == '__main__':
+    test()
author	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-10 18:02:06 +0200
committer	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-10 18:02:06 +0200
commit	b13d359c27a03c19ac113ced417d00155942ef11 (patch)
tree	6baa74737c8de1924352be9164a64c4c5c7399ee
parent	964eac0a27214f12b11cd867aabe07d3de868ea3 (diff)