diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-10 18:02:06 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-10 18:02:06 +0200 |
commit | b13d359c27a03c19ac113ced417d00155942ef11 (patch) | |
tree | 6baa74737c8de1924352be9164a64c4c5c7399ee | |
parent | 964eac0a27214f12b11cd867aabe07d3de868ea3 (diff) |
mime types
-rw-r--r-- | modules/language/python/module/email/mime/audio.py | 76 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/base.py | 30 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/image.py | 49 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/message.py | 36 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/nonmultipart.py | 24 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/text.py | 43 | ||||
-rw-r--r-- | modules/language/python/module/imghdr.py | 170 | ||||
-rw-r--r-- | modules/language/python/module/sndhdr.py | 247 |
8 files changed, 675 insertions, 0 deletions
diff --git a/modules/language/python/module/email/mime/audio.py b/modules/language/python/module/email/mime/audio.py new file mode 100644 index 0000000..ccd08fe --- /dev/null +++ b/modules/language/python/module/email/mime/audio.py @@ -0,0 +1,76 @@ +module(email,mime,audio) + +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Anthony Baxter +# Contact: email-sig@python.org + +"""Class representing audio/* type MIME documents.""" + +__all__ = ['MIMEAudio'] + +import sndhdr + +from io import BytesIO +import email.encoders as encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +_sndhdr_MIMEmap = {'au' : 'basic', + 'wav' :'x-wav', + 'aiff':'x-aiff', + 'aifc':'x-aiff', + } + +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _whatsnd(data): + """Try to identify a sound file type. + + sndhdr.what() has a pretty cruddy interface, unfortunately. This is why + we re-do it here. It would be easier to reverse engineer the Unix 'file' + command and use the standard 'magic' file, as shipped with a modern Unix. + """ + hdr = data[:512] + fakefile = BytesIO(hdr) + for testfn in sndhdr.tests: + res = testfn(hdr, fakefile) + if res is not None: + return _sndhdr_MIMEmap.get(res[0]) + return None + + + +class MIMEAudio(MIMENonMultipart): + """Class for generating audio/* MIME documents.""" + + def __init__(self, _audiodata, _subtype=None, + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an audio/* type MIME document. + + _audiodata is a string containing the raw audio data. If this data + can be decoded by the standard Python `sndhdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific audio subtype via the + _subtype parameter. If _subtype is not given, and no subtype can be + guessed, a TypeError is raised. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = _whatsnd(_audiodata) + if _subtype is None: + raise TypeError('Could not find audio MIME subtype') + MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, + **_params) + self.set_payload(_audiodata) + _encoder(self) diff --git a/modules/language/python/module/email/mime/base.py b/modules/language/python/module/email/mime/base.py new file mode 100644 index 0000000..3e269bd --- /dev/null +++ b/modules/language/python/module/email/mime/base.py @@ -0,0 +1,30 @@ +module(email,mime,base) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME specializations.""" + +__all__ = ['MIMEBase'] + +import email.policy +import email.message as message + + + +class MIMEBase(message.Message): + """Base class for MIME specializations.""" + + def __init__(self, _maintype, _subtype, *, policy=None, **_params): + """This constructor adds a Content-Type: and a MIME-Version: header. + + The Content-Type: header is taken from the _maintype and _subtype + arguments. Additional parameters for this header are taken from the + keyword arguments. + """ + if policy is None: + policy = email.policy.compat32 + message.Message.__init__(self, policy=policy) + ctype = '%s/%s' % (_maintype, _subtype) + self.add_header('Content-Type', ctype, **_params) + self['MIME-Version'] = '1.0' diff --git a/modules/language/python/module/email/mime/image.py b/modules/language/python/module/email/mime/image.py new file mode 100644 index 0000000..5346dac --- /dev/null +++ b/modules/language/python/module/email/mime/image.py @@ -0,0 +1,49 @@ +module(email,mime,image) + +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing image/* type MIME documents.""" + +__all__ = ['MIMEImage'] + +import imghdr + +import email.encoders as encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEImage(MIMENonMultipart): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _subtype=None, + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific image subtype via the _subtype + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = imghdr.what(None, _imagedata) + if _subtype is None: + raise TypeError('Could not guess image MIME subtype') + MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, + **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/modules/language/python/module/email/mime/message.py b/modules/language/python/module/email/mime/message.py new file mode 100644 index 0000000..e215d3e --- /dev/null +++ b/modules/language/python/module/email/mime/message.py @@ -0,0 +1,36 @@ +module(email,mime,message) + +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing message/* MIME documents.""" + +__all__ = ['MIMEMessage'] + +import email.message as message +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEMessage(MIMENonMultipart): + """Class representing message/* MIME documents.""" + + def __init__(self, _msg, _subtype='rfc822', *, policy=None): + """Create a message/* type MIME document. + + _msg is a message object and must be an instance of Message, or a + derived class of Message, otherwise a TypeError is raised. + + Optional _subtype defines the subtype of the contained message. The + default is "rfc822" (this is defined by the MIME standard, even though + the term "rfc822" is technically outdated by RFC 2822). + """ + MIMENonMultipart.__init__(self, 'message', _subtype, policy=policy) + if not isinstance(_msg, message.Message): + raise TypeError('Argument is not an instance of Message') + # It's convenient to use this base class method. We need to do it + # this way or we'll get an exception + message.Message.attach(self, _msg) + # And be sure our default type is set correctly + self.set_default_type('message/rfc822') diff --git a/modules/language/python/module/email/mime/nonmultipart.py b/modules/language/python/module/email/mime/nonmultipart.py new file mode 100644 index 0000000..aa5d06a --- /dev/null +++ b/modules/language/python/module/email/mime/nonmultipart.py @@ -0,0 +1,24 @@ +module(email,mime,nonmultipart) + +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME type messages that are not multipart.""" + +__all__ = ['MIMENonMultipart'] + +import email.errors as errors +from email.mime.base import MIMEBase + + + +class MIMENonMultipart(MIMEBase): + """Base class for MIME non-multipart type messages.""" + + def attach(self, payload): + # The public API prohibits attaching multiple subparts to MIMEBase + # derived subtypes since none of them are, by definition, of content + # type multipart/* + raise errors.MultipartConversionError( + 'Cannot attach additional subparts to non-multipart/*') diff --git a/modules/language/python/module/email/mime/text.py b/modules/language/python/module/email/mime/text.py new file mode 100644 index 0000000..755b140 --- /dev/null +++ b/modules/language/python/module/email/mime/text.py @@ -0,0 +1,43 @@ +module(email,mime,text) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing text/* type MIME documents.""" + +__all__ = ['MIMEText'] + +from email.charset import Charset +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEText(MIMENonMultipart): + """Class for generating text/* type MIME documents.""" + + def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None): + """Create a text/* type MIME document. + + _text is the string for this message object. + + _subtype is the MIME sub content type, defaulting to "plain". + + _charset is the character set parameter added to the Content-Type + header. This defaults to "us-ascii". Note that as a side-effect, the + Content-Transfer-Encoding header will also be set. + """ + + # If no _charset was specified, check to see if there are non-ascii + # characters present. If not, use 'us-ascii', otherwise use utf-8. + # XXX: This can be removed once #7304 is fixed. + if _charset is None: + try: + _text.encode('us-ascii') + _charset = 'us-ascii' + except UnicodeEncodeError: + _charset = 'utf-8' + + MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy, + **{'charset': str(_charset)}) + + self.set_payload(_text, _charset) diff --git a/modules/language/python/module/imghdr.py b/modules/language/python/module/imghdr.py new file mode 100644 index 0000000..8820a9d --- /dev/null +++ b/modules/language/python/module/imghdr.py @@ -0,0 +1,170 @@ +module(imghdr) + +"""Recognize image file formats based on their first few bytes.""" + +from os import PathLike + +__all__ = ["what"] + +#-------------------------# +# Recognize image headers # +#-------------------------# + +def what(file, h=None): + f = None + try: + if h is None: + if isinstance(file, (str, PathLike)): + f = open(file, 'rb') + h = f.read(32) + else: + location = file.tell() + h = file.read(32) + file.seek(location) + for tf in tests: + res = tf(h, f) + if res: + return res + finally: + if f: f.close() + return None + + +#---------------------------------# +# Subroutines per image file type # +#---------------------------------# + +tests = [] + +def test_jpeg(h, f): + """JPEG data in JFIF or Exif format""" + if h[6:10] in (b'JFIF', b'Exif'): + return 'jpeg' + +tests.append(test_jpeg) + +def test_png(h, f): + if h.startswith(b'\211PNG\r\n\032\n'): + return 'png' + +tests.append(test_png) + +def test_gif(h, f): + """GIF ('87 and '89 variants)""" + if h[:6] in (b'GIF87a', b'GIF89a'): + return 'gif' + +tests.append(test_gif) + +def test_tiff(h, f): + """TIFF (can be in Motorola or Intel byte order)""" + if h[:2] in (b'MM', b'II'): + return 'tiff' + +tests.append(test_tiff) + +def test_rgb(h, f): + """SGI image library""" + if h.startswith(b'\001\332'): + return 'rgb' + +tests.append(test_rgb) + +def test_pbm(h, f): + """PBM (portable bitmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': + return 'pbm' + +tests.append(test_pbm) + +def test_pgm(h, f): + """PGM (portable graymap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': + return 'pgm' + +tests.append(test_pgm) + +def test_ppm(h, f): + """PPM (portable pixmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': + return 'ppm' + +tests.append(test_ppm) + +def test_rast(h, f): + """Sun raster file""" + if h.startswith(b'\x59\xA6\x6A\x95'): + return 'rast' + +tests.append(test_rast) + +def test_xbm(h, f): + """X bitmap (X10 or X11)""" + if h.startswith(b'#define '): + return 'xbm' + +tests.append(test_xbm) + +def test_bmp(h, f): + if h.startswith(b'BM'): + return 'bmp' + +tests.append(test_bmp) + +def test_webp(h, f): + if h.startswith(b'RIFF') and h[8:12] == b'WEBP': + return 'webp' + +tests.append(test_webp) + +def test_exr(h, f): + if h.startswith(b'\x76\x2f\x31\x01'): + return 'exr' + +tests.append(test_exr) + +#--------------------# +# Small test program # +#--------------------# + +def test(): + import sys + recursive = 0 + if sys.argv[1:] and sys.argv[1] == '-r': + del sys.argv[1:2] + recursive = 1 + try: + if sys.argv[1:]: + testall(sys.argv[1:], recursive, 1) + else: + testall(['.'], recursive, 1) + except KeyboardInterrupt: + sys.stderr.write('\n[Interrupted]\n') + sys.exit(1) + +def testall(list, recursive, toplevel): + import sys + import os + for filename in list: + if os.path.isdir(filename): + print(filename + '/:', end=' ') + if recursive or toplevel: + print('recursing down:') + import glob + names = glob.glob(os.path.join(filename, '*')) + testall(names, recursive, 0) + else: + print('*** directory (use -r) ***') + else: + print(filename + ':', end=' ') + sys.stdout.flush() + try: + print(what(filename)) + except OSError: + print('*** not found ***') + +if __name__ == '__main__': + test() diff --git a/modules/language/python/module/sndhdr.py b/modules/language/python/module/sndhdr.py new file mode 100644 index 0000000..42b3d1a --- /dev/null +++ b/modules/language/python/module/sndhdr.py @@ -0,0 +1,247 @@ +module(sndhdr) + +"""Routines to help recognizing sound files. + +Function whathdr() recognizes various types of sound file headers. +It understands almost all headers that SOX can decode. + +The return tuple contains the following items, in this order: +- file type (as SOX understands it) +- sampling rate (0 if unknown or hard to decode) +- number of channels (0 if unknown or hard to decode) +- number of frames in the file (-1 if unknown or hard to decode) +- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW + +If the file doesn't have a recognizable type, it returns None. +If the file can't be opened, OSError is raised. + +To compute the total time, divide the number of frames by the +sampling rate (a frame contains a sample for each channel). + +Function what() calls whathdr(). (It used to also use some +heuristics for raw data, but this doesn't work very well.) + +Finally, the function test() is a simple main program that calls +what() for all files mentioned on the argument list. For directory +arguments it calls what() for all files in that directory. Default +argument is "." (testing all files in the current directory). The +option -r tells it to recurse down directories found inside +explicitly given directories. +""" + +# The file structure is top-down except that the test program and its +# subroutine come last. + +__all__ = ['what', 'whathdr'] + +from collections import namedtuple + +SndHeaders = namedtuple('SndHeaders', + 'filetype framerate nchannels nframes sampwidth') + +def what(filename): + """Guess the type of a sound file.""" + res = whathdr(filename) + return res + + +def whathdr(filename): + """Recognize sound headers.""" + with open(filename, 'rb') as f: + h = f.read(512) + for tf in tests: + res = tf(h, f) + if res: + return SndHeaders(*res) + return None + + +#-----------------------------------# +# Subroutines per sound header type # +#-----------------------------------# + +tests = [] + +def test_aifc(h, f): + import aifc + if not h.startswith(b'FORM'): + return None + if h[8:12] == b'AIFC': + fmt = 'aifc' + elif h[8:12] == b'AIFF': + fmt = 'aiff' + else: + return None + f.seek(0) + try: + a = aifc.open(f, 'r') + except (EOFError, aifc.Error): + return None + return (fmt, a.getframerate(), a.getnchannels(), + a.getnframes(), 8 * a.getsampwidth()) + +tests.append(test_aifc) + + +def test_au(h, f): + if h.startswith(b'.snd'): + func = get_long_be + elif h[:4] in (b'\0ds.', b'dns.'): + func = get_long_le + else: + return None + filetype = 'au' + hdr_size = func(h[4:8]) + data_size = func(h[8:12]) + encoding = func(h[12:16]) + rate = func(h[16:20]) + nchannels = func(h[20:24]) + sample_size = 1 # default + if encoding == 1: + sample_bits = 'U' + elif encoding == 2: + sample_bits = 8 + elif encoding == 3: + sample_bits = 16 + sample_size = 2 + else: + sample_bits = '?' + frame_size = sample_size * nchannels + if frame_size: + nframe = data_size / frame_size + else: + nframe = -1 + return filetype, rate, nchannels, nframe, sample_bits + +tests.append(test_au) + + +def test_hcom(h, f): + if h[65:69] != b'FSSD' or h[128:132] != b'HCOM': + return None + divisor = get_long_be(h[144:148]) + if divisor: + rate = 22050 / divisor + else: + rate = 0 + return 'hcom', rate, 1, -1, 8 + +tests.append(test_hcom) + + +def test_voc(h, f): + if not h.startswith(b'Creative Voice File\032'): + return None + sbseek = get_short_le(h[20:22]) + rate = 0 + if 0 <= sbseek < 500 and h[sbseek] == 1: + ratecode = 256 - h[sbseek+4] + if ratecode: + rate = int(1000000.0 / ratecode) + return 'voc', rate, 1, -1, 8 + +tests.append(test_voc) + + +def test_wav(h, f): + import wave + # 'RIFF' <len> 'WAVE' 'fmt ' <len> + if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': + return None + f.seek(0) + try: + w = wave.openfp(f, 'r') + except (EOFError, wave.Error): + return None + return ('wav', w.getframerate(), w.getnchannels(), + w.getnframes(), 8*w.getsampwidth()) + +tests.append(test_wav) + + +def test_8svx(h, f): + if not h.startswith(b'FORM') or h[8:12] != b'8SVX': + return None + # Should decode it to get #channels -- assume always 1 + return '8svx', 0, 1, 0, 8 + +tests.append(test_8svx) + + +def test_sndt(h, f): + if h.startswith(b'SOUND'): + nsamples = get_long_le(h[8:12]) + rate = get_short_le(h[20:22]) + return 'sndt', rate, 1, nsamples, 8 + +tests.append(test_sndt) + + +def test_sndr(h, f): + if h.startswith(b'\0\0'): + rate = get_short_le(h[2:4]) + if 4000 <= rate <= 25000: + return 'sndr', rate, 1, -1, 8 + +tests.append(test_sndr) + + +#-------------------------------------------# +# Subroutines to extract numbers from bytes # +#-------------------------------------------# + +def get_long_be(b): + return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3] + +def get_long_le(b): + return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0] + +def get_short_be(b): + return (b[0] << 8) | b[1] + +def get_short_le(b): + return (b[1] << 8) | b[0] + + +#--------------------# +# Small test program # +#--------------------# + +def test(): + import sys + recursive = 0 + if sys.argv[1:] and sys.argv[1] == '-r': + del sys.argv[1:2] + recursive = 1 + try: + if sys.argv[1:]: + testall(sys.argv[1:], recursive, 1) + else: + testall(['.'], recursive, 1) + except KeyboardInterrupt: + sys.stderr.write('\n[Interrupted]\n') + sys.exit(1) + +def testall(list, recursive, toplevel): + import sys + import os + for filename in list: + if os.path.isdir(filename): + print(filename + '/:', end=' ') + if recursive or toplevel: + print('recursing down:') + import glob + names = glob.glob(os.path.join(filename, '*')) + testall(names, recursive, 0) + else: + print('*** directory (use -r) ***') + else: + print(filename + ':', end=' ') + sys.stdout.flush() + try: + print(what(filename)) + except OSError: + print('*** not found ***') + +if __name__ == '__main__': + test() |