3 """Routines to help recognizing sound files.
5 Function whathdr() recognizes various types of sound file headers.
6 It understands almost all headers that SOX can decode.
8 The return tuple contains the following items, in this order:
9 - file type (as SOX understands it)
10 - sampling rate (0 if unknown or hard to decode)
11 - number of channels (0 if unknown or hard to decode)
12 - number of frames in the file (-1 if unknown or hard to decode)
13 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
15 If the file doesn't have a recognizable type, it returns None.
16 If the file can't be opened, OSError is raised.
18 To compute the total time, divide the number of frames by the
19 sampling rate (a frame contains a sample for each channel).
21 Function what() calls whathdr(). (It used to also use some
22 heuristics for raw data, but this doesn't work very well.)
24 Finally, the function test() is a simple main program that calls
25 what() for all files mentioned on the argument list. For directory
26 arguments it calls what() for all files in that directory. Default
27 argument is "." (testing all files in the current directory). The
28 option -r tells it to recurse down directories found inside
29 explicitly given directories.
32 # The file structure is top-down except that the test program and its
33 # subroutine come last.
35 __all__
= ['what', 'whathdr']
37 from collections
import namedtuple
39 SndHeaders
= namedtuple('SndHeaders',
40 'filetype framerate nchannels nframes sampwidth')
43 """Guess the type of a sound file."""
44 res
= whathdr(filename
)
48 def whathdr(filename
):
49 """Recognize sound headers."""
50 with
open(filename
, 'rb') as f
:
55 return SndHeaders(*res
)
59 #-----------------------------------#
60 # Subroutines per sound header type #
61 #-----------------------------------#
67 if not h
.startswith(b
'FORM'):
69 if h
[8:12] == b
'AIFC':
71 elif h
[8:12] == b
'AIFF':
78 except (EOFError, aifc
.Error
):
80 return (fmt
, a
.getframerate(), a
.getnchannels(),
81 a
.getnframes(), 8 * a
.getsampwidth())
83 tests
.append(test_aifc
)
87 if h
.startswith(b
'.snd'):
89 elif h
[:4] in (b
'\0ds.', b
'dns.'):
94 hdr_size
= func(h
[4:8])
95 data_size
= func(h
[8:12])
96 encoding
= func(h
[12:16])
98 nchannels
= func(h
[20:24])
99 sample_size
= 1 # default
109 frame_size
= sample_size
* nchannels
111 nframe
= data_size
/ frame_size
114 return filetype
, rate
, nchannels
, nframe
, sample_bits
116 tests
.append(test_au
)
120 if h
[65:69] != b
'FSSD' or h
[128:132] != b
'HCOM':
122 divisor
= get_long_be(h
[144:148])
124 rate
= 22050 / divisor
127 return 'hcom', rate
, 1, -1, 8
129 tests
.append(test_hcom
)
133 if not h
.startswith(b
'Creative Voice File\032'):
135 sbseek
= get_short_le(h
[20:22])
137 if 0 <= sbseek
< 500 and h
[sbseek
] == 1:
138 ratecode
= 256 - h
[sbseek
+4]
140 rate
= int(1000000.0 / ratecode
)
141 return 'voc', rate
, 1, -1, 8
143 tests
.append(test_voc
)
148 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
149 if not h
.startswith(b
'RIFF') or h
[8:12] != b
'WAVE' or h
[12:16] != b
'fmt ':
153 w
= wave
.openfp(f
, 'r')
154 except (EOFError, wave
.Error
):
156 return ('wav', w
.getframerate(), w
.getnchannels(),
157 w
.getnframes(), 8*w
.getsampwidth())
159 tests
.append(test_wav
)
163 if not h
.startswith(b
'FORM') or h
[8:12] != b
'8SVX':
165 # Should decode it to get #channels -- assume always 1
166 return '8svx', 0, 1, 0, 8
168 tests
.append(test_8svx
)
172 if h
.startswith(b
'SOUND'):
173 nsamples
= get_long_le(h
[8:12])
174 rate
= get_short_le(h
[20:22])
175 return 'sndt', rate
, 1, nsamples
, 8
177 tests
.append(test_sndt
)
181 if h
.startswith(b
'\0\0'):
182 rate
= get_short_le(h
[2:4])
183 if 4000 <= rate
<= 25000:
184 return 'sndr', rate
, 1, -1, 8
186 tests
.append(test_sndr
)
189 #-------------------------------------------#
190 # Subroutines to extract numbers from bytes #
191 #-------------------------------------------#
194 return (b
[0] << 24) |
(b
[1] << 16) |
(b
[2] << 8) | b
[3]
197 return (b
[3] << 24) |
(b
[2] << 16) |
(b
[1] << 8) | b
[0]
200 return (b
[0] << 8) | b
[1]
203 return (b
[1] << 8) | b
[0]
206 #--------------------#
207 # Small test program #
208 #--------------------#
213 if sys
.argv
[1:] and sys
.argv
[1] == '-r':
218 testall(sys
.argv
[1:], recursive
, 1)
220 testall(['.'], recursive
, 1)
221 except KeyboardInterrupt:
222 sys
.stderr
.write('\n[Interrupted]\n')
225 def testall(list, recursive
, toplevel
):
228 for filename
in list:
229 if os
.path
.isdir(filename
):
230 print(filename
+ '/:', end
=' ')
231 if recursive
or toplevel
:
232 print('recursing down:')
234 names
= glob
.glob(os
.path
.join(filename
, '*'))
235 testall(names
, recursive
, 0)
237 print('*** directory (use -r) ***')
239 print(filename
+ ':', end
=' ')
242 print(what(filename
))
244 print('*** not found ***')
246 if __name__
== '__main__':