summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-09-10 18:02:06 +0200
committerStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-09-10 18:02:06 +0200
commitb13d359c27a03c19ac113ced417d00155942ef11 (patch)
tree6baa74737c8de1924352be9164a64c4c5c7399ee
parent964eac0a27214f12b11cd867aabe07d3de868ea3 (diff)
mime types
-rw-r--r--modules/language/python/module/email/mime/audio.py76
-rw-r--r--modules/language/python/module/email/mime/base.py30
-rw-r--r--modules/language/python/module/email/mime/image.py49
-rw-r--r--modules/language/python/module/email/mime/message.py36
-rw-r--r--modules/language/python/module/email/mime/nonmultipart.py24
-rw-r--r--modules/language/python/module/email/mime/text.py43
-rw-r--r--modules/language/python/module/imghdr.py170
-rw-r--r--modules/language/python/module/sndhdr.py247
8 files changed, 675 insertions, 0 deletions
diff --git a/modules/language/python/module/email/mime/audio.py b/modules/language/python/module/email/mime/audio.py
new file mode 100644
index 0000000..ccd08fe
--- /dev/null
+++ b/modules/language/python/module/email/mime/audio.py
@@ -0,0 +1,76 @@
+module(email,mime,audio)
+
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from io import BytesIO
+import email.encoders as encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = BytesIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+
+
+
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, *, policy=None, **_params):
+ """Create an audio/* type MIME document.
+
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy,
+ **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/modules/language/python/module/email/mime/base.py b/modules/language/python/module/email/mime/base.py
new file mode 100644
index 0000000..3e269bd
--- /dev/null
+++ b/modules/language/python/module/email/mime/base.py
@@ -0,0 +1,30 @@
+module(email,mime,base)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+
+__all__ = ['MIMEBase']
+
+import email.policy
+import email.message as message
+
+
+
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _maintype, _subtype, *, policy=None, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ if policy is None:
+ policy = email.policy.compat32
+ message.Message.__init__(self, policy=policy)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/modules/language/python/module/email/mime/image.py b/modules/language/python/module/email/mime/image.py
new file mode 100644
index 0000000..5346dac
--- /dev/null
+++ b/modules/language/python/module/email/mime/image.py
@@ -0,0 +1,49 @@
+module(email,mime,image)
+
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+import email.encoders as encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, *, policy=None, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy,
+ **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/modules/language/python/module/email/mime/message.py b/modules/language/python/module/email/mime/message.py
new file mode 100644
index 0000000..e215d3e
--- /dev/null
+++ b/modules/language/python/module/email/mime/message.py
@@ -0,0 +1,36 @@
+module(email,mime,message)
+
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+
+__all__ = ['MIMEMessage']
+
+import email.message as message
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+
+ def __init__(self, _msg, _subtype='rfc822', *, policy=None):
+ """Create a message/* type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype, policy=policy)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/modules/language/python/module/email/mime/nonmultipart.py b/modules/language/python/module/email/mime/nonmultipart.py
new file mode 100644
index 0000000..aa5d06a
--- /dev/null
+++ b/modules/language/python/module/email/mime/nonmultipart.py
@@ -0,0 +1,24 @@
+module(email,mime,nonmultipart)
+
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+
+__all__ = ['MIMENonMultipart']
+
+import email.errors as errors
+from email.mime.base import MIMEBase
+
+
+
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME non-multipart type messages."""
+
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/modules/language/python/module/email/mime/text.py b/modules/language/python/module/email/mime/text.py
new file mode 100644
index 0000000..755b140
--- /dev/null
+++ b/modules/language/python/module/email/mime/text.py
@@ -0,0 +1,43 @@
+module(email,mime,text)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+
+__all__ = ['MIMEText']
+
+from email.charset import Charset
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object.
+
+ _subtype is the MIME sub content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+
+ # If no _charset was specified, check to see if there are non-ascii
+ # characters present. If not, use 'us-ascii', otherwise use utf-8.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset is None:
+ try:
+ _text.encode('us-ascii')
+ _charset = 'us-ascii'
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
+ MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy,
+ **{'charset': str(_charset)})
+
+ self.set_payload(_text, _charset)
diff --git a/modules/language/python/module/imghdr.py b/modules/language/python/module/imghdr.py
new file mode 100644
index 0000000..8820a9d
--- /dev/null
+++ b/modules/language/python/module/imghdr.py
@@ -0,0 +1,170 @@
+module(imghdr)
+
+"""Recognize image file formats based on their first few bytes."""
+
+from os import PathLike
+
+__all__ = ["what"]
+
+#-------------------------#
+# Recognize image headers #
+#-------------------------#
+
+def what(file, h=None):
+ f = None
+ try:
+ if h is None:
+ if isinstance(file, (str, PathLike)):
+ f = open(file, 'rb')
+ h = f.read(32)
+ else:
+ location = file.tell()
+ h = file.read(32)
+ file.seek(location)
+ for tf in tests:
+ res = tf(h, f)
+ if res:
+ return res
+ finally:
+ if f: f.close()
+ return None
+
+
+#---------------------------------#
+# Subroutines per image file type #
+#---------------------------------#
+
+tests = []
+
+def test_jpeg(h, f):
+ """JPEG data in JFIF or Exif format"""
+ if h[6:10] in (b'JFIF', b'Exif'):
+ return 'jpeg'
+
+tests.append(test_jpeg)
+
+def test_png(h, f):
+ if h.startswith(b'\211PNG\r\n\032\n'):
+ return 'png'
+
+tests.append(test_png)
+
+def test_gif(h, f):
+ """GIF ('87 and '89 variants)"""
+ if h[:6] in (b'GIF87a', b'GIF89a'):
+ return 'gif'
+
+tests.append(test_gif)
+
+def test_tiff(h, f):
+ """TIFF (can be in Motorola or Intel byte order)"""
+ if h[:2] in (b'MM', b'II'):
+ return 'tiff'
+
+tests.append(test_tiff)
+
+def test_rgb(h, f):
+ """SGI image library"""
+ if h.startswith(b'\001\332'):
+ return 'rgb'
+
+tests.append(test_rgb)
+
+def test_pbm(h, f):
+ """PBM (portable bitmap)"""
+ if len(h) >= 3 and \
+ h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r':
+ return 'pbm'
+
+tests.append(test_pbm)
+
+def test_pgm(h, f):
+ """PGM (portable graymap)"""
+ if len(h) >= 3 and \
+ h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r':
+ return 'pgm'
+
+tests.append(test_pgm)
+
+def test_ppm(h, f):
+ """PPM (portable pixmap)"""
+ if len(h) >= 3 and \
+ h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r':
+ return 'ppm'
+
+tests.append(test_ppm)
+
+def test_rast(h, f):
+ """Sun raster file"""
+ if h.startswith(b'\x59\xA6\x6A\x95'):
+ return 'rast'
+
+tests.append(test_rast)
+
+def test_xbm(h, f):
+ """X bitmap (X10 or X11)"""
+ if h.startswith(b'#define '):
+ return 'xbm'
+
+tests.append(test_xbm)
+
+def test_bmp(h, f):
+ if h.startswith(b'BM'):
+ return 'bmp'
+
+tests.append(test_bmp)
+
+def test_webp(h, f):
+ if h.startswith(b'RIFF') and h[8:12] == b'WEBP':
+ return 'webp'
+
+tests.append(test_webp)
+
+def test_exr(h, f):
+ if h.startswith(b'\x76\x2f\x31\x01'):
+ return 'exr'
+
+tests.append(test_exr)
+
+#--------------------#
+# Small test program #
+#--------------------#
+
+def test():
+ import sys
+ recursive = 0
+ if sys.argv[1:] and sys.argv[1] == '-r':
+ del sys.argv[1:2]
+ recursive = 1
+ try:
+ if sys.argv[1:]:
+ testall(sys.argv[1:], recursive, 1)
+ else:
+ testall(['.'], recursive, 1)
+ except KeyboardInterrupt:
+ sys.stderr.write('\n[Interrupted]\n')
+ sys.exit(1)
+
+def testall(list, recursive, toplevel):
+ import sys
+ import os
+ for filename in list:
+ if os.path.isdir(filename):
+ print(filename + '/:', end=' ')
+ if recursive or toplevel:
+ print('recursing down:')
+ import glob
+ names = glob.glob(os.path.join(filename, '*'))
+ testall(names, recursive, 0)
+ else:
+ print('*** directory (use -r) ***')
+ else:
+ print(filename + ':', end=' ')
+ sys.stdout.flush()
+ try:
+ print(what(filename))
+ except OSError:
+ print('*** not found ***')
+
+if __name__ == '__main__':
+ test()
diff --git a/modules/language/python/module/sndhdr.py b/modules/language/python/module/sndhdr.py
new file mode 100644
index 0000000..42b3d1a
--- /dev/null
+++ b/modules/language/python/module/sndhdr.py
@@ -0,0 +1,247 @@
+module(sndhdr)
+
+"""Routines to help recognizing sound files.
+
+Function whathdr() recognizes various types of sound file headers.
+It understands almost all headers that SOX can decode.
+
+The return tuple contains the following items, in this order:
+- file type (as SOX understands it)
+- sampling rate (0 if unknown or hard to decode)
+- number of channels (0 if unknown or hard to decode)
+- number of frames in the file (-1 if unknown or hard to decode)
+- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
+
+If the file doesn't have a recognizable type, it returns None.
+If the file can't be opened, OSError is raised.
+
+To compute the total time, divide the number of frames by the
+sampling rate (a frame contains a sample for each channel).
+
+Function what() calls whathdr(). (It used to also use some
+heuristics for raw data, but this doesn't work very well.)
+
+Finally, the function test() is a simple main program that calls
+what() for all files mentioned on the argument list. For directory
+arguments it calls what() for all files in that directory. Default
+argument is "." (testing all files in the current directory). The
+option -r tells it to recurse down directories found inside
+explicitly given directories.
+"""
+
+# The file structure is top-down except that the test program and its
+# subroutine come last.
+
+__all__ = ['what', 'whathdr']
+
+from collections import namedtuple
+
+SndHeaders = namedtuple('SndHeaders',
+ 'filetype framerate nchannels nframes sampwidth')
+
+def what(filename):
+ """Guess the type of a sound file."""
+ res = whathdr(filename)
+ return res
+
+
+def whathdr(filename):
+ """Recognize sound headers."""
+ with open(filename, 'rb') as f:
+ h = f.read(512)
+ for tf in tests:
+ res = tf(h, f)
+ if res:
+ return SndHeaders(*res)
+ return None
+
+
+#-----------------------------------#
+# Subroutines per sound header type #
+#-----------------------------------#
+
+tests = []
+
+def test_aifc(h, f):
+ import aifc
+ if not h.startswith(b'FORM'):
+ return None
+ if h[8:12] == b'AIFC':
+ fmt = 'aifc'
+ elif h[8:12] == b'AIFF':
+ fmt = 'aiff'
+ else:
+ return None
+ f.seek(0)
+ try:
+ a = aifc.open(f, 'r')
+ except (EOFError, aifc.Error):
+ return None
+ return (fmt, a.getframerate(), a.getnchannels(),
+ a.getnframes(), 8 * a.getsampwidth())
+
+tests.append(test_aifc)
+
+
+def test_au(h, f):
+ if h.startswith(b'.snd'):
+ func = get_long_be
+ elif h[:4] in (b'\0ds.', b'dns.'):
+ func = get_long_le
+ else:
+ return None
+ filetype = 'au'
+ hdr_size = func(h[4:8])
+ data_size = func(h[8:12])
+ encoding = func(h[12:16])
+ rate = func(h[16:20])
+ nchannels = func(h[20:24])
+ sample_size = 1 # default
+ if encoding == 1:
+ sample_bits = 'U'
+ elif encoding == 2:
+ sample_bits = 8
+ elif encoding == 3:
+ sample_bits = 16
+ sample_size = 2
+ else:
+ sample_bits = '?'
+ frame_size = sample_size * nchannels
+ if frame_size:
+ nframe = data_size / frame_size
+ else:
+ nframe = -1
+ return filetype, rate, nchannels, nframe, sample_bits
+
+tests.append(test_au)
+
+
+def test_hcom(h, f):
+ if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
+ return None
+ divisor = get_long_be(h[144:148])
+ if divisor:
+ rate = 22050 / divisor
+ else:
+ rate = 0
+ return 'hcom', rate, 1, -1, 8
+
+tests.append(test_hcom)
+
+
+def test_voc(h, f):
+ if not h.startswith(b'Creative Voice File\032'):
+ return None
+ sbseek = get_short_le(h[20:22])
+ rate = 0
+ if 0 <= sbseek < 500 and h[sbseek] == 1:
+ ratecode = 256 - h[sbseek+4]
+ if ratecode:
+ rate = int(1000000.0 / ratecode)
+ return 'voc', rate, 1, -1, 8
+
+tests.append(test_voc)
+
+
+def test_wav(h, f):
+ import wave
+ # 'RIFF' <len> 'WAVE' 'fmt ' <len>
+ if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
+ return None
+ f.seek(0)
+ try:
+ w = wave.openfp(f, 'r')
+ except (EOFError, wave.Error):
+ return None
+ return ('wav', w.getframerate(), w.getnchannels(),
+ w.getnframes(), 8*w.getsampwidth())
+
+tests.append(test_wav)
+
+
+def test_8svx(h, f):
+ if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
+ return None
+ # Should decode it to get #channels -- assume always 1
+ return '8svx', 0, 1, 0, 8
+
+tests.append(test_8svx)
+
+
+def test_sndt(h, f):
+ if h.startswith(b'SOUND'):
+ nsamples = get_long_le(h[8:12])
+ rate = get_short_le(h[20:22])
+ return 'sndt', rate, 1, nsamples, 8
+
+tests.append(test_sndt)
+
+
+def test_sndr(h, f):
+ if h.startswith(b'\0\0'):
+ rate = get_short_le(h[2:4])
+ if 4000 <= rate <= 25000:
+ return 'sndr', rate, 1, -1, 8
+
+tests.append(test_sndr)
+
+
+#-------------------------------------------#
+# Subroutines to extract numbers from bytes #
+#-------------------------------------------#
+
+def get_long_be(b):
+ return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
+
+def get_long_le(b):
+ return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
+
+def get_short_be(b):
+ return (b[0] << 8) | b[1]
+
+def get_short_le(b):
+ return (b[1] << 8) | b[0]
+
+
+#--------------------#
+# Small test program #
+#--------------------#
+
+def test():
+ import sys
+ recursive = 0
+ if sys.argv[1:] and sys.argv[1] == '-r':
+ del sys.argv[1:2]
+ recursive = 1
+ try:
+ if sys.argv[1:]:
+ testall(sys.argv[1:], recursive, 1)
+ else:
+ testall(['.'], recursive, 1)
+ except KeyboardInterrupt:
+ sys.stderr.write('\n[Interrupted]\n')
+ sys.exit(1)
+
+def testall(list, recursive, toplevel):
+ import sys
+ import os
+ for filename in list:
+ if os.path.isdir(filename):
+ print(filename + '/:', end=' ')
+ if recursive or toplevel:
+ print('recursing down:')
+ import glob
+ names = glob.glob(os.path.join(filename, '*'))
+ testall(names, recursive, 0)
+ else:
+ print('*** directory (use -r) ***')
+ else:
+ print(filename + ':', end=' ')
+ sys.stdout.flush()
+ try:
+ print(what(filename))
+ except OSError:
+ print('*** not found ***')
+
+if __name__ == '__main__':
+ test()