altEngine
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
stb_vorbis.h
Go to the documentation of this file.
1 // Ogg Vorbis audio decoder - v1.09 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // This software is dual-licensed to the public domain and under the following
13 // license: you are granted a perpetual, irrevocable license to copy, modify,
14 // publish, and distribute this file as you see fit.
15 //
16 // No warranty for any purpose is expressed or implied by the author (nor
17 // by RAD Game Tools). Report bugs and send enhancements to the author.
18 //
19 // Limitations:
20 //
21 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
22 // - lossless sample-truncation at beginning ignored
23 // - cannot concatenate multiple vorbis streams
24 // - sample positions are 32-bit, limiting seekable 192Khz
25 // files to around 6 hours (Ogg supports 64-bit)
26 //
27 // Feature contributors:
28 // Dougall Johnson (sample-exact seeking)
29 //
30 // Bugfix/warning contributors:
31 // Terje Mathisen Niklas Frykholm Andy Hill
32 // Casey Muratori John Bolton Gargaj
33 // Laurent Gomila Marc LeBlanc Ronny Chevalier
34 // Bernhard Wodo Evan Balster alxprd@github
35 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
36 // Phillip Bennefall Rohit Thiago Goulart
37 // manxorist@github saga musix
38 //
39 // Partial history:
40 // 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version
41 // 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
42 // 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
43 // 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
44 // some crash fixes when out of memory or with corrupt files
45 // fix some inappropriately signed shifts
46 // 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
47 // 1.04 - 2014/08/27 - fix missing const-correct case in API
48 // 1.03 - 2014/08/07 - warning fixes
49 // 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
50 // 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
51 // 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
52 // (API change) report sample rate for decode-full-file funcs
53 //
54 // See end of file for full version history.
55 
56 
58 //
59 // HEADER BEGINS HERE
60 //
61 
62 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
63 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
64 
65 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
66 #define STB_VORBIS_NO_STDIO 1
67 #endif
68 
69 #ifndef STB_VORBIS_NO_STDIO
70 #include <stdio.h>
71 #endif
72 
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
76 
78 
79 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
80 // them from multiple threads at the same time. However, you can have multiple
81 // stb_vorbis* handles and decode from them independently in multiple thrads.
82 
83 
85 
86 // normally stb_vorbis uses malloc() to allocate memory at startup,
87 // and alloca() to allocate temporary memory during a frame on the
88 // stack. (Memory consumption will depend on the amount of setup
89 // data in the file and how you set the compile flags for speed
90 // vs. size. In my test files the maximal-size usage is ~150KB.)
91 //
92 // You can modify the wrapper functions in the source (setup_malloc,
93 // setup_temp_malloc, temp_malloc) to change this behavior, or you
94 // can use a simpler allocation model: you pass in a buffer from
95 // which stb_vorbis will allocate _all_ its memory (including the
96 // temp memory). "open" may fail with a VORBIS_outofmem if you
97 // do not pass in enough data; there is no way to determine how
98 // much you do need except to succeed (at which point you can
99 // query get_info to find the exact amount required. yes I know
100 // this is lame).
101 //
102 // If you pass in a non-NULL buffer of the type below, allocation
103 // will occur from it as described above. Otherwise just pass NULL
104 // to use malloc()/alloca()
105 
106 typedef struct
107 {
111 
112 
114 
115 typedef struct stb_vorbis stb_vorbis;
116 
117 typedef struct
118 {
119  unsigned int sample_rate;
120  int channels;
121 
122  unsigned int setup_memory_required;
124  unsigned int temp_memory_required;
125 
128 
129 // get general information about the file
131 
132 // get the last error detected (clears it, too)
133 extern int stb_vorbis_get_error(stb_vorbis *f);
134 
135 // close an ogg vorbis file and free all memory in use
136 extern void stb_vorbis_close(stb_vorbis *f);
137 
138 // this function returns the offset (in samples) from the beginning of the
139 // file that will be returned by the next decode, if it is known, or -1
140 // otherwise. after a flush_pushdata() call, this may take a while before
141 // it becomes valid again.
142 // NOT WORKING YET after a seek with PULLDATA API
144 
145 // returns the current seek point within the file, or offset from the beginning
146 // of the memory buffer. In pushdata mode it returns 0.
147 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
148 
150 
151 #ifndef STB_VORBIS_NO_PUSHDATA_API
152 
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159 
161  const unsigned char * datablock, int datablock_length_in_bytes,
162  int *datablock_memory_consumed_in_bytes,
163  int *error,
164  const stb_vorbis_alloc *alloc_buffer);
165 // create a vorbis decoder by passing in the initial data block containing
166 // the ogg&vorbis headers (you don't need to do parse them, just provide
167 // the first N bytes of the file--you're told if it's not enough, see below)
168 // on success, returns an stb_vorbis *, does not set error, returns the amount of
169 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
170 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
171 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
172 // incomplete and you need to pass in a larger block from the start of the file
173 
175  stb_vorbis *f,
176  const unsigned char *datablock, int datablock_length_in_bytes,
177  int *channels, // place to write number of float * buffers
178  float ***output, // place to write float ** array of float * buffers
179  int *samples // place to write number of output samples
180  );
181 // decode a frame of audio sample data if possible from the passed-in data block
182 //
183 // return value: number of bytes we used from datablock
184 //
185 // possible cases:
186 // 0 bytes used, 0 samples output (need more data)
187 // N bytes used, 0 samples output (resynching the stream, keep going)
188 // N bytes used, M samples output (one frame of data)
189 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
190 // frame, because Vorbis always "discards" the first frame.
191 //
192 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
193 // instead only datablock_length_in_bytes-3 or less. This is because it wants
194 // to avoid missing parts of a page header if they cross a datablock boundary,
195 // without writing state-machiney code to record a partial detection.
196 //
197 // The number of channels returned are stored in *channels (which can be
198 // NULL--it is always the same as the number of channels reported by
199 // get_info). *output will contain an array of float* buffers, one per
200 // channel. In other words, (*output)[0][0] contains the first sample from
201 // the first channel, and (*output)[1][0] contains the first sample from
202 // the second channel.
203 
205 // inform stb_vorbis that your next datablock will not be contiguous with
206 // previous ones (e.g. you've seeked in the data); future attempts to decode
207 // frames will cause stb_vorbis to resynchronize (as noted above), and
208 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
209 // will begin decoding the _next_ frame.
210 //
211 // if you want to seek using pushdata, you need to seek in your file, then
212 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
213 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
214 // if you don't like the result, seek your file again and repeat.
215 #endif
216 
217 
219 
220 #ifndef STB_VORBIS_NO_PULLDATA_API
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE * that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE * case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
227 
228 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
230 #endif
231 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
233 #endif
234 // decode an entire file and output the data interleaved into a malloc()ed
235 // buffer stored in *output. The return value is the number of samples
236 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
237 // When you're done with it, just free() the pointer returned in *output.
238 
239 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
240  int *error, const stb_vorbis_alloc *alloc_buffer);
241 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
242 // this must be the entire stream!). on failure, returns NULL and sets *error
243 
244 #ifndef STB_VORBIS_NO_STDIO
245 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
246  int *error, const stb_vorbis_alloc *alloc_buffer);
247 // create an ogg vorbis decoder from a filename via fopen(). on failure,
248 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
249 
250 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
251  int *error, const stb_vorbis_alloc *alloc_buffer);
252 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
253 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
254 // note that stb_vorbis must "own" this stream; if you seek it in between
255 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
256 // perform stb_vorbis_seek_*() operations on this file, it will assume it
257 // owns the _entire_ rest of the file after the start point. Use the next
258 // function, stb_vorbis_open_file_section(), to limit it.
259 
260 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
261  int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
262 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
263 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
264 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
265 // this stream; if you seek it in between calls to stb_vorbis, it will become
266 // confused.
267 #endif
268 
269 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
270 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
271 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
272 // after calling seek_frame(), the next call to get_frame_*() will include
273 // the specified sample. after calling stb_vorbis_seek(), the next call to
274 // stb_vorbis_get_samples_* will start with the specified sample. If you
275 // do not need to seek to EXACTLY the target sample when using get_samples_*,
276 // you can also use seek_frame().
277 
278 extern void stb_vorbis_seek_start(stb_vorbis *f);
279 // this function is equivalent to stb_vorbis_seek(f,0)
280 
281 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
283 // these functions return the total length of the vorbis stream
284 
285 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
286 // decode the next frame and return the number of samples. the number of
287 // channels returned are stored in *channels (which can be NULL--it is always
288 // the same as the number of channels reported by get_info). *output will
289 // contain an array of float* buffers, one per channel. These outputs will
290 // be overwritten on the next call to stb_vorbis_get_frame_*.
291 //
292 // You generally should not intermix calls to stb_vorbis_get_frame_*()
293 // and stb_vorbis_get_samples_*(), since the latter calls the former.
294 
295 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
296 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
297 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
298 #endif
299 // decode the next frame and return the number of *samples* per channel.
300 // Note that for interleaved data, you pass in the number of shorts (the
301 // size of your array), but the return value is the number of samples per
302 // channel, not the total number of samples.
303 //
304 // The data is coerced to the number of channels you request according to the
305 // channel coercion rules (see below). You must pass in the size of your
306 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
307 // The maximum buffer size needed can be gotten from get_info(); however,
308 // the Vorbis I specification implies an absolute maximum of 4096 samples
309 // per channel.
310 
311 // Channel coercion rules:
312 // Let M be the number of channels requested, and N the number of channels present,
313 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
314 // and stereo R be the sum of all R and center channels (channel assignment from the
315 // vorbis spec).
316 // M N output
317 // 1 k sum(Ck) for all k
318 // 2 * stereo L, stereo R
319 // k l k > l, the first l channels, then 0s
320 // k l k <= l, the first k channels
321 // Note that this is not _good_ surround etc. mixing at all! It's just so
322 // you get something useful.
323 
324 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
325 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
326 // gets num_samples samples, not necessarily on a frame boundary--this requires
327 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
328 // Returns the number of samples stored per channel; it may be less than requested
329 // at the end of the file. If there are no more samples in the file, returns 0.
330 
331 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
332 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
333 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
334 #endif
335 // gets num_samples samples, not necessarily on a frame boundary--this requires
336 // buffering so you have to supply the buffers. Applies the coercion rules above
337 // to produce 'channels' channels. Returns the number of samples stored per channel;
338 // it may be less than requested at the end of the file. If there are no more
339 // samples in the file, returns 0.
340 
341 #endif
342 
344 
346 {
348 
349  VORBIS_need_more_data=1, // not a real error
350 
351  VORBIS_invalid_api_mixing, // can't mix API modes
352  VORBIS_outofmem, // not enough memory
354  VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
355  VORBIS_file_open_failure, // fopen() failed
356  VORBIS_seek_without_length, // can't seek in unknown-length file
357 
358  VORBIS_unexpected_eof=10, // file is truncated?
359  VORBIS_seek_invalid, // seek past EOF
360 
361  // decoding errors (corrupt/invalid stream) -- you probably
362  // don't care about the exact details of these
363 
364  // vorbis errors:
367 
368  // ogg errors:
377 };
378 
379 
380 #ifdef __cplusplus
381 }
382 #endif
383 
384 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
385 //
386 // HEADER ENDS HERE
387 //
389 
390 #ifndef STB_VORBIS_HEADER_ONLY
391 
392 // global configuration settings (e.g. set these in the project/makefile),
393 // or just set them in this file at the top (although ideally the first few
394 // should be visible when the header file is compiled too, although it's not
395 // crucial)
396 
397 // STB_VORBIS_NO_PUSHDATA_API
398 // does not compile the code for the various stb_vorbis_*_pushdata()
399 // functions
400 // #define STB_VORBIS_NO_PUSHDATA_API
401 
402 // STB_VORBIS_NO_PULLDATA_API
403 // does not compile the code for the non-pushdata APIs
404 // #define STB_VORBIS_NO_PULLDATA_API
405 
406 // STB_VORBIS_NO_STDIO
407 // does not compile the code for the APIs that use FILE *s internally
408 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
409 // #define STB_VORBIS_NO_STDIO
410 
411 // STB_VORBIS_NO_INTEGER_CONVERSION
412 // does not compile the code for converting audio sample data from
413 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
414 // #define STB_VORBIS_NO_INTEGER_CONVERSION
415 
416 // STB_VORBIS_NO_FAST_SCALED_FLOAT
417 // does not use a fast float-to-int trick to accelerate float-to-int on
418 // most platforms which requires endianness be defined correctly.
419 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
420 
421 
422 // STB_VORBIS_MAX_CHANNELS [number]
423 // globally define this to the maximum number of channels you need.
424 // The spec does not put a restriction on channels except that
425 // the count is stored in a byte, so 255 is the hard limit.
426 // Reducing this saves about 16 bytes per value, so using 16 saves
427 // (255-16)*16 or around 4KB. Plus anything other memory usage
428 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
429 // 6 (5.1 audio), or 2 (stereo only).
430 #ifndef STB_VORBIS_MAX_CHANNELS
431 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
432 #endif
433 
434 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
435 // after a flush_pushdata(), stb_vorbis begins scanning for the
436 // next valid page, without backtracking. when it finds something
437 // that looks like a page, it streams through it and verifies its
438 // CRC32. Should that validation fail, it keeps scanning. But it's
439 // possible that _while_ streaming through to check the CRC32 of
440 // one candidate page, it sees another candidate page. This #define
441 // determines how many "overlapping" candidate pages it can search
442 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
443 // garbage pages could be as big as 64KB, but probably average ~16KB.
444 // So don't hose ourselves by scanning an apparent 64KB page and
445 // missing a ton of real ones in the interim; so minimum of 2
446 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
447 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
448 #endif
449 
450 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
451 // sets the log size of the huffman-acceleration table. Maximum
452 // supported value is 24. with larger numbers, more decodings are O(1),
453 // but the table size is larger so worse cache missing, so you'll have
454 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
455 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
456 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
457 #endif
458 
459 // STB_VORBIS_FAST_BINARY_LENGTH [number]
460 // sets the log size of the binary-search acceleration table. this
461 // is used in similar fashion to the fast-huffman size to set initial
462 // parameters for the binary search
463 
464 // STB_VORBIS_FAST_HUFFMAN_INT
465 // The fast huffman tables are much more efficient if they can be
466 // stored as 16-bit results instead of 32-bit results. This restricts
467 // the codebooks to having only 65535 possible outcomes, though.
468 // (At least, accelerated by the huffman table.)
469 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
470 #define STB_VORBIS_FAST_HUFFMAN_SHORT
471 #endif
472 
473 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
474 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
475 // back on binary searching for the correct one. This requires storing
476 // extra tables with the huffman codes in sorted order. Defining this
477 // symbol trades off space for speed by forcing a linear search in the
478 // non-fast case, except for "sparse" codebooks.
479 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
480 
481 // STB_VORBIS_DIVIDES_IN_RESIDUE
482 // stb_vorbis precomputes the result of the scalar residue decoding
483 // that would otherwise require a divide per chunk. you can trade off
484 // space for time by defining this symbol.
485 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
486 
487 // STB_VORBIS_DIVIDES_IN_CODEBOOK
488 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
489 // stored, or with all elements being chosen from a small range of values,
490 // and all values possible in all elements. By default, stb_vorbis expands
491 // this latter kind out to look like the former kind for ease of decoding,
492 // because otherwise an integer divide-per-vector-element is required to
493 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
494 // trade off storage for speed.
495 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
496 
497 #ifdef STB_VORBIS_CODEBOOK_SHORTS
498 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
499 #endif
500 
501 // STB_VORBIS_DIVIDE_TABLE
502 // this replaces small integer divides in the floor decode loop with
503 // table lookups. made less than 1% difference, so disabled by default.
504 
505 // STB_VORBIS_NO_INLINE_DECODE
506 // disables the inlining of the scalar codebook fast-huffman decode.
507 // might save a little codespace; useful for debugging
508 // #define STB_VORBIS_NO_INLINE_DECODE
509 
510 // STB_VORBIS_NO_DEFER_FLOOR
511 // Normally we only decode the floor without synthesizing the actual
512 // full curve. We can instead synthesize the curve immediately. This
513 // requires more memory and is very likely slower, so I don't think
514 // you'd ever want to do it except for debugging.
515 // #define STB_VORBIS_NO_DEFER_FLOOR
516 
517 
518 
519 
521 
522 #ifdef STB_VORBIS_NO_PULLDATA_API
523  #define STB_VORBIS_NO_INTEGER_CONVERSION
524  #define STB_VORBIS_NO_STDIO
525 #endif
526 
527 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
528  #define STB_VORBIS_NO_STDIO 1
529 #endif
530 
531 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
532 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
533 
534  // only need endianness for fast-float-to-int, which we don't
535  // use for pushdata
536 
537  #ifndef STB_VORBIS_BIG_ENDIAN
538  #define STB_VORBIS_ENDIAN 0
539  #else
540  #define STB_VORBIS_ENDIAN 1
541  #endif
542 
543 #endif
544 #endif
545 
546 
547 #ifndef STB_VORBIS_NO_STDIO
548 #include <stdio.h>
549 #endif
550 
551 #ifndef STB_VORBIS_NO_CRT
552  #include <stdlib.h>
553  #include <string.h>
554  #include <assert.h>
555  #include <math.h>
556 
557  // find definition of alloca if it's not in stdlib.h:
558  #ifdef _MSC_VER
559  #include <malloc.h>
560  #endif
561  #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
562  #include <alloca.h>
563  #endif
564 #else // STB_VORBIS_NO_CRT
565  #define NULL 0
566  #define malloc(s) 0
567  #define free(s) ((void) 0)
568  #define realloc(s) 0
569 #endif // STB_VORBIS_NO_CRT
570 
571 #include <limits.h>
572 
573 #ifdef __MINGW32__
574  // eff you mingw:
575  // "fixed":
576  // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
577  // "no that broke the build, reverted, who cares about C":
578  // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
579  #ifdef __forceinline
580  #undef __forceinline
581  #endif
582  #define __forceinline
583 #elif !defined(_MSC_VER)
584  #if __GNUC__
585  #define __forceinline inline
586  #else
587  #define __forceinline
588  #endif
589 #endif
590 
591 #if STB_VORBIS_MAX_CHANNELS > 256
592 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
593 #endif
594 
595 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
596 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
597 #endif
598 
599 
600 #if 0
601 #include <crtdbg.h>
602 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
603 #else
604 #define CHECK(f) ((void) 0)
605 #endif
606 
607 #define MAX_BLOCKSIZE_LOG 13 // from specification
608 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
609 
610 
611 typedef unsigned char uint8;
612 typedef signed char int8;
613 typedef unsigned short uint16;
614 typedef signed short int16;
615 typedef unsigned int uint32;
616 typedef signed int int32;
617 
618 #ifndef TRUE
619 #define TRUE 1
620 #define FALSE 0
621 #endif
622 
623 typedef float codetype;
624 
625 // @NOTE
626 //
627 // Some arrays below are tagged "//varies", which means it's actually
628 // a variable-sized piece of data, but rather than malloc I assume it's
629 // small enough it's better to just allocate it all together with the
630 // main thing
631 //
632 // Most of the variables are specified with the smallest size I could pack
633 // them into. It might give better performance to make them all full-sized
634 // integers. It should be safe to freely rearrange the structures or change
635 // the sizes larger--nothing relies on silently truncating etc., nor the
636 // order of variables.
637 
638 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
639 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
640 
641 typedef struct
642 {
643  int dimensions, entries;
646  float delta_value;
654  #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
656  #else
657  int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
658  #endif
662 } Codebook;
663 
664 typedef struct
665 {
672  uint8 book_list[16]; // varies
673 } Floor0;
674 
675 typedef struct
676 {
678  uint8 partition_class_list[32]; // varies
679  uint8 class_dimensions[16]; // varies
680  uint8 class_subclasses[16]; // varies
681  uint8 class_masterbooks[16]; // varies
682  int16 subclass_books[16][8]; // varies
683  uint16 Xlist[31*8+2]; // varies
684  uint8 sorted_order[31*8+2];
685  uint8 neighbors[31*8+2][2];
688  int values;
689 } Floor1;
690 
691 typedef union
692 {
695 } Floor;
696 
697 typedef struct
698 {
699  uint32 begin, end;
704  int16 (*residue_books)[8];
705 } Residue;
706 
707 typedef struct
708 {
713 
714 typedef struct
715 {
719  uint8 submap_floor[15]; // varies
720  uint8 submap_residue[15]; // varies
721 } Mapping;
722 
723 typedef struct
724 {
729 } Mode;
730 
731 typedef struct
732 {
733  uint32 goal_crc; // expected crc if match
734  int bytes_left; // bytes left in packet
735  uint32 crc_so_far; // running crc
736  int bytes_done; // bytes processed in _current_ chunk
737  uint32 sample_loc; // granule pos encoded in page
738 } CRCscan;
739 
740 typedef struct
741 {
742  uint32 page_start, page_end;
744 } ProbedPage;
745 
747 {
748  // user-accessible info
749  unsigned int sample_rate;
750  int channels;
751 
752  unsigned int setup_memory_required;
753  unsigned int temp_memory_required;
755 
756  // input config
757 #ifndef STB_VORBIS_NO_STDIO
758  FILE *f;
761 #endif
762 
766 
768 
770 
772 
774 
775  // memory management
779 
780  // run-time results
781  int eof;
783 
784  // user-useful data
785 
786  // header info
787  int blocksize[2];
792  uint16 floor_types[64]; // varies
795  uint16 residue_types[64]; // varies
800  Mode mode_config[64]; // varies
801 
803 
804  // decode buffer
807 
810 
811  #ifndef STB_VORBIS_NO_DEFER_FLOOR
813  #else
814  float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
815  #endif
816 
817  uint32 current_loc; // sample location of next frame to decode
819 
820  // per-blocksize precomputed data
821 
822  // twiddle factors
823  float *A[2],*B[2],*C[2];
824  float *window[2];
826 
827  // current page/packet/segment streaming info
828  uint32 serial; // stream serial number for verification
835  int next_seg;
836  int last_seg; // flag that we're on the last segment
837  int last_seg_which; // what was the segment number of the last seg?
845 
846  // push mode scanning
847  int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
848 #ifndef STB_VORBIS_NO_PUSHDATA_API
850 #endif
851 
852  // sample-access
855 };
856 
857 #if defined(STB_VORBIS_NO_PUSHDATA_API)
858  #define IS_PUSH_MODE(f) FALSE
859 #elif defined(STB_VORBIS_NO_PULLDATA_API)
860  #define IS_PUSH_MODE(f) TRUE
861 #else
862  #define IS_PUSH_MODE(f) ((f)->push_mode)
863 #endif
864 
865 typedef struct stb_vorbis vorb;
866 
867 static int error(vorb *f, enum STBVorbisError e)
868 {
869  f->error = e;
870  if (!f->eof && e != VORBIS_need_more_data) {
871  f->error=e; // breakpoint for debugging
872  }
873  return 0;
874 }
875 
876 
877 // these functions are used for allocating temporary memory
878 // while decoding. if you can afford the stack space, use
879 // alloca(); otherwise, provide a temp buffer and it will
880 // allocate out of those.
881 
882 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
883 
884 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
885 #ifdef dealloca
886 #define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size))
887 #else
888 #define temp_free(f,p) 0
889 #endif
890 #define temp_alloc_save(f) ((f)->temp_offset)
891 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
892 
893 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
894 
895 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
896 static void *make_block_array(void *mem, int count, int size)
897 {
898  int i;
899  void ** p = (void **) mem;
900  char *q = (char *) (p + count);
901  for (i=0; i < count; ++i) {
902  p[i] = q;
903  q += size;
904  }
905  return p;
906 }
907 
908 static void *setup_malloc(vorb *f, int sz)
909 {
910  sz = (sz+3) & ~3;
911  f->setup_memory_required += sz;
912  if (f->alloc.alloc_buffer) {
913  void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
914  if (f->setup_offset + sz > f->temp_offset) return NULL;
915  f->setup_offset += sz;
916  return p;
917  }
918  return sz ? malloc(sz) : NULL;
919 }
920 
921 static void setup_free(vorb *f, void *p)
922 {
923  if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
924  free(p);
925 }
926 
927 static void *setup_temp_malloc(vorb *f, int sz)
928 {
929  sz = (sz+3) & ~3;
930  if (f->alloc.alloc_buffer) {
931  if (f->temp_offset - sz < f->setup_offset) return NULL;
932  f->temp_offset -= sz;
933  return (char *) f->alloc.alloc_buffer + f->temp_offset;
934  }
935  return malloc(sz);
936 }
937 
938 static void setup_temp_free(vorb *f, void *p, int sz)
939 {
940  if (f->alloc.alloc_buffer) {
941  f->temp_offset += (sz+3)&~3;
942  return;
943  }
944  free(p);
945 }
946 
947 #define CRC32_POLY 0x04c11db7 // from spec
948 
949 static uint32 crc_table[256];
950 static void crc32_init(void)
951 {
952  int i,j;
953  uint32 s;
954  for(i=0; i < 256; i++) {
955  for (s=(uint32) i << 24, j=0; j < 8; ++j)
956  s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
957  crc_table[i] = s;
958  }
959 }
960 
962 {
963  return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
964 }
965 
966 
967 // used in setup, and for huffman that doesn't go fast path
968 static unsigned int bit_reverse(unsigned int n)
969 {
970  n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
971  n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
972  n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
973  n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
974  return (n >> 16) | (n << 16);
975 }
976 
977 static float square(float x)
978 {
979  return x*x;
980 }
981 
982 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
983 // as required by the specification. fast(?) implementation from stb.h
984 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
985 static int ilog(int32 n)
986 {
987  static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
988 
989  // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
990  if (n < (1 << 14))
991  if (n < (1 << 4)) return 0 + log2_4[n ];
992  else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
993  else return 10 + log2_4[n >> 10];
994  else if (n < (1 << 24))
995  if (n < (1 << 19)) return 15 + log2_4[n >> 15];
996  else return 20 + log2_4[n >> 20];
997  else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
998  else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
999  else return 0; // signed n returns 0
1000 }
1001 
1002 #ifndef M_PI
1003  #define M_PI 3.14159265358979323846264f // from CRC
1004 #endif
1005 
1006 // code length assigned to a value with no huffman encoding
1007 #define NO_CODE 255
1008 
1010 //
1011 // these functions are only called at setup, and only a few times
1012 // per file
1013 
1014 static float float32_unpack(uint32 x)
1015 {
1016  // from the specification
1017  uint32 mantissa = x & 0x1fffff;
1018  uint32 sign = x & 0x80000000;
1019  uint32 exp = (x & 0x7fe00000) >> 21;
1020  double res = sign ? -(double)mantissa : (double)mantissa;
1021  return (float) ldexp((float)res, exp-788);
1022 }
1023 
1024 
1025 // zlib & jpeg huffman tables assume that the output symbols
1026 // can either be arbitrarily arranged, or have monotonically
1027 // increasing frequencies--they rely on the lengths being sorted;
1028 // this makes for a very simple generation algorithm.
1029 // vorbis allows a huffman table with non-sorted lengths. This
1030 // requires a more sophisticated construction, since symbols in
1031 // order do not map to huffman codes "in order".
1032 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1033 {
1034  if (!c->sparse) {
1035  c->codewords [symbol] = huff_code;
1036  } else {
1037  c->codewords [count] = huff_code;
1038  c->codeword_lengths[count] = len;
1039  values [count] = symbol;
1040  }
1041 }
1042 
1043 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1044 {
1045  int i,k,m=0;
1046  uint32 available[32];
1047 
1048  memset(available, 0, sizeof(available));
1049  // find the first entry
1050  for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1051  if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1052  // add to the list
1053  add_entry(c, 0, k, m++, len[k], values);
1054  // add all available leaves
1055  for (i=1; i <= len[k]; ++i)
1056  available[i] = 1U << (32-i);
1057  // note that the above code treats the first case specially,
1058  // but it's really the same as the following code, so they
1059  // could probably be combined (except the initial code is 0,
1060  // and I use 0 in available[] to mean 'empty')
1061  for (i=k+1; i < n; ++i) {
1062  uint32 res;
1063  int z = len[i], y;
1064  if (z == NO_CODE) continue;
1065  // find lowest available leaf (should always be earliest,
1066  // which is what the specification calls for)
1067  // note that this property, and the fact we can never have
1068  // more than one free leaf at a given level, isn't totally
1069  // trivial to prove, but it seems true and the assert never
1070  // fires, so!
1071  while (z > 0 && !available[z]) --z;
1072  if (z == 0) { return FALSE; }
1073  res = available[z];
1074  assert(z >= 0 && z < 32);
1075  available[z] = 0;
1076  add_entry(c, bit_reverse(res), i, m++, len[i], values);
1077  // propogate availability up the tree
1078  if (z != len[i]) {
1079  assert(len[i] >= 0 && len[i] < 32);
1080  for (y=len[i]; y > z; --y) {
1081  assert(available[y] == 0);
1082  available[y] = res + (1 << (32-y));
1083  }
1084  }
1085  }
1086  return TRUE;
1087 }
1088 
1089 // accelerated huffman table allows fast O(1) match of all symbols
1090 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
1092 {
1093  int i, len;
1094  for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1095  c->fast_huffman[i] = -1;
1096 
1097  len = c->sparse ? c->sorted_entries : c->entries;
1099  if (len > 32767) len = 32767; // largest possible value we can encode!
1100  #endif
1101  for (i=0; i < len; ++i) {
1103  uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1104  // set table entries for all bit combinations in the higher bits
1105  while (z < FAST_HUFFMAN_TABLE_SIZE) {
1106  c->fast_huffman[z] = i;
1107  z += 1 << c->codeword_lengths[i];
1108  }
1109  }
1110  }
1111 }
1112 
1113 #ifdef _MSC_VER
1114 #define STBV_CDECL __cdecl
1115 #else
1116 #define STBV_CDECL
1117 #endif
1118 
1119 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1120 {
1121  uint32 x = * (uint32 *) p;
1122  uint32 y = * (uint32 *) q;
1123  return x < y ? -1 : x > y;
1124 }
1125 
1126 static int include_in_sort(Codebook *c, uint8 len)
1127 {
1128  if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1129  if (len == NO_CODE) return FALSE;
1130  if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1131  return FALSE;
1132 }
1133 
1134 // if the fast table above doesn't work, we want to binary
1135 // search them... need to reverse the bits
1136 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1137 {
1138  int i, len;
1139  // build a list of all the entries
1140  // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1141  // this is kind of a frivolous optimization--I don't see any performance improvement,
1142  // but it's like 4 extra lines of code, so.
1143  if (!c->sparse) {
1144  int k = 0;
1145  for (i=0; i < c->entries; ++i)
1146  if (include_in_sort(c, lengths[i]))
1147  c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1148  assert(k == c->sorted_entries);
1149  } else {
1150  for (i=0; i < c->sorted_entries; ++i)
1151  c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1152  }
1153 
1154  qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1155  c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1156 
1157  len = c->sparse ? c->sorted_entries : c->entries;
1158  // now we need to indicate how they correspond; we could either
1159  // #1: sort a different data structure that says who they correspond to
1160  // #2: for each sorted entry, search the original list to find who corresponds
1161  // #3: for each original entry, find the sorted entry
1162  // #1 requires extra storage, #2 is slow, #3 can use binary search!
1163  for (i=0; i < len; ++i) {
1164  int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1165  if (include_in_sort(c,huff_len)) {
1166  uint32 code = bit_reverse(c->codewords[i]);
1167  int x=0, n=c->sorted_entries;
1168  while (n > 1) {
1169  // invariant: sc[x] <= code < sc[x+n]
1170  int m = x + (n >> 1);
1171  if (c->sorted_codewords[m] <= code) {
1172  x = m;
1173  n -= (n>>1);
1174  } else {
1175  n >>= 1;
1176  }
1177  }
1178  assert(c->sorted_codewords[x] == code);
1179  if (c->sparse) {
1180  c->sorted_values[x] = values[i];
1181  c->codeword_lengths[x] = huff_len;
1182  } else {
1183  c->sorted_values[x] = i;
1184  }
1185  }
1186  }
1187 }
1188 
1189 // only run while parsing the header (3 times)
1190 static int vorbis_validate(uint8 *data)
1191 {
1192  static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1193  return memcmp(data, vorbis, 6) == 0;
1194 }
1195 
1196 // called from setup only, once per code book
1197 // (formula implied by specification)
1198 static int lookup1_values(int entries, int dim)
1199 {
1200  int r = (int) floor(exp((float) log((float) entries) / dim));
1201  if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1202  ++r; // floor() to avoid _ftol() when non-CRT
1203  assert(pow((float) r+1, dim) > entries);
1204  assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1205  return r;
1206 }
1207 
1208 // called twice per file
1209 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1210 {
1211  int n4 = n >> 2, n8 = n >> 3;
1212  int k,k2;
1213 
1214  for (k=k2=0; k < n4; ++k,k2+=2) {
1215  A[k2 ] = (float) cos(4*k*M_PI/n);
1216  A[k2+1] = (float) -sin(4*k*M_PI/n);
1217  B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1218  B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1219  }
1220  for (k=k2=0; k < n8; ++k,k2+=2) {
1221  C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1222  C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1223  }
1224 }
1225 
1226 static void compute_window(int n, float *window)
1227 {
1228  int n2 = n >> 1, i;
1229  for (i=0; i < n2; ++i)
1230  window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1231 }
1232 
1233 static void compute_bitreverse(int n, uint16 *rev)
1234 {
1235  int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1236  int i, n8 = n >> 3;
1237  for (i=0; i < n8; ++i)
1238  rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1239 }
1240 
1241 static int init_blocksize(vorb *f, int b, int n)
1242 {
1243  int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1244  f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1245  f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1246  f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1247  if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1248  compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1249  f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1250  if (!f->window[b]) return error(f, VORBIS_outofmem);
1251  compute_window(n, f->window[b]);
1252  f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1253  if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1254  compute_bitreverse(n, f->bit_reverse[b]);
1255  return TRUE;
1256 }
1257 
1258 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1259 {
1260  int low = -1;
1261  int high = 65536;
1262  int i;
1263  for (i=0; i < n; ++i) {
1264  if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1265  if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1266  }
1267 }
1268 
1269 // this has been repurposed so y is now the original index instead of y
1270 typedef struct
1271 {
1273 } Point;
1274 
1275 static int STBV_CDECL point_compare(const void *p, const void *q)
1276 {
1277  Point *a = (Point *) p;
1278  Point *b = (Point *) q;
1279  return a->x < b->x ? -1 : a->x > b->x;
1280 }
1281 
1282 //
1284 
1285 
1286 #if defined(STB_VORBIS_NO_STDIO)
1287  #define USE_MEMORY(z) TRUE
1288 #else
1289  #define USE_MEMORY(z) ((z)->stream)
1290 #endif
1291 
1292 static uint8 get8(vorb *z)
1293 {
1294  if (USE_MEMORY(z)) {
1295  if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1296  return *z->stream++;
1297  }
1298 
1299  #ifndef STB_VORBIS_NO_STDIO
1300  {
1301  int c = fgetc(z->f);
1302  if (c == EOF) { z->eof = TRUE; return 0; }
1303  return c;
1304  }
1305  #endif
1306 }
1307 
1308 static uint32 get32(vorb *f)
1309 {
1310  uint32 x;
1311  x = get8(f);
1312  x += get8(f) << 8;
1313  x += get8(f) << 16;
1314  x += (uint32) get8(f) << 24;
1315  return x;
1316 }
1317 
1318 static int getn(vorb *z, uint8 *data, int n)
1319 {
1320  if (USE_MEMORY(z)) {
1321  if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1322  memcpy(data, z->stream, n);
1323  z->stream += n;
1324  return 1;
1325  }
1326 
1327  #ifndef STB_VORBIS_NO_STDIO
1328  if (fread(data, n, 1, z->f) == 1)
1329  return 1;
1330  else {
1331  z->eof = 1;
1332  return 0;
1333  }
1334  #endif
1335 }
1336 
1337 static void skip(vorb *z, int n)
1338 {
1339  if (USE_MEMORY(z)) {
1340  z->stream += n;
1341  if (z->stream >= z->stream_end) z->eof = 1;
1342  return;
1343  }
1344  #ifndef STB_VORBIS_NO_STDIO
1345  {
1346  long x = ftell(z->f);
1347  fseek(z->f, x+n, SEEK_SET);
1348  }
1349  #endif
1350 }
1351 
1352 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1353 {
1354  #ifndef STB_VORBIS_NO_PUSHDATA_API
1355  if (f->push_mode) return 0;
1356  #endif
1357  f->eof = 0;
1358  if (USE_MEMORY(f)) {
1359  if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1360  f->stream = f->stream_end;
1361  f->eof = 1;
1362  return 0;
1363  } else {
1364  f->stream = f->stream_start + loc;
1365  return 1;
1366  }
1367  }
1368  #ifndef STB_VORBIS_NO_STDIO
1369  if (loc + f->f_start < loc || loc >= 0x80000000) {
1370  loc = 0x7fffffff;
1371  f->eof = 1;
1372  } else {
1373  loc += f->f_start;
1374  }
1375  if (!fseek(f->f, loc, SEEK_SET))
1376  return 1;
1377  f->eof = 1;
1378  fseek(f->f, f->f_start, SEEK_END);
1379  return 0;
1380  #endif
1381 }
1382 
1383 
1384 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1385 
1386 static int capture_pattern(vorb *f)
1387 {
1388  if (0x4f != get8(f)) return FALSE;
1389  if (0x67 != get8(f)) return FALSE;
1390  if (0x67 != get8(f)) return FALSE;
1391  if (0x53 != get8(f)) return FALSE;
1392  return TRUE;
1393 }
1394 
1395 #define PAGEFLAG_continued_packet 1
1396 #define PAGEFLAG_first_page 2
1397 #define PAGEFLAG_last_page 4
1398 
1400 {
1401  uint32 loc0,loc1,n;
1402  // stream structure version
1403  if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1404  // header flag
1405  f->page_flag = get8(f);
1406  // absolute granule position
1407  loc0 = get32(f);
1408  loc1 = get32(f);
1409  // @TODO: validate loc0,loc1 as valid positions?
1410  // stream serial number -- vorbis doesn't interleave, so discard
1411  get32(f);
1412  //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1413  // page sequence number
1414  n = get32(f);
1415  f->last_page = n;
1416  // CRC32
1417  get32(f);
1418  // page_segments
1419  f->segment_count = get8(f);
1420  if (!getn(f, f->segments, f->segment_count))
1421  return error(f, VORBIS_unexpected_eof);
1422  // assume we _don't_ know any the sample position of any segments
1423  f->end_seg_with_known_loc = -2;
1424  if (loc0 != ~0U || loc1 != ~0U) {
1425  int i;
1426  // determine which packet is the last one that will complete
1427  for (i=f->segment_count-1; i >= 0; --i)
1428  if (f->segments[i] < 255)
1429  break;
1430  // 'i' is now the index of the _last_ segment of a packet that ends
1431  if (i >= 0) {
1432  f->end_seg_with_known_loc = i;
1433  f->known_loc_for_packet = loc0;
1434  }
1435  }
1436  if (f->first_decode) {
1437  int i,len;
1438  ProbedPage p;
1439  len = 0;
1440  for (i=0; i < f->segment_count; ++i)
1441  len += f->segments[i];
1442  len += 27 + f->segment_count;
1444  p.page_end = p.page_start + len;
1445  p.last_decoded_sample = loc0;
1446  f->p_first = p;
1447  }
1448  f->next_seg = 0;
1449  return TRUE;
1450 }
1451 
1452 static int start_page(vorb *f)
1453 {
1455  return start_page_no_capturepattern(f);
1456 }
1457 
1458 static int start_packet(vorb *f)
1459 {
1460  while (f->next_seg == -1) {
1461  if (!start_page(f)) return FALSE;
1464  }
1465  f->last_seg = FALSE;
1466  f->valid_bits = 0;
1467  f->packet_bytes = 0;
1468  f->bytes_in_seg = 0;
1469  // f->next_seg is now valid
1470  return TRUE;
1471 }
1472 
1474 {
1475  if (f->next_seg == -1) {
1476  int x = get8(f);
1477  if (f->eof) return FALSE; // EOF at page boundary is not an error!
1478  if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1479  if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1480  if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1481  if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1482  if (!start_page_no_capturepattern(f)) return FALSE;
1484  // set up enough state that we can read this packet if we want,
1485  // e.g. during recovery
1486  f->last_seg = FALSE;
1487  f->bytes_in_seg = 0;
1489  }
1490  }
1491  return start_packet(f);
1492 }
1493 
1494 static int next_segment(vorb *f)
1495 {
1496  int len;
1497  if (f->last_seg) return 0;
1498  if (f->next_seg == -1) {
1499  f->last_seg_which = f->segment_count-1; // in case start_page fails
1500  if (!start_page(f)) { f->last_seg = 1; return 0; }
1502  }
1503  len = f->segments[f->next_seg++];
1504  if (len < 255) {
1505  f->last_seg = TRUE;
1506  f->last_seg_which = f->next_seg-1;
1507  }
1508  if (f->next_seg >= f->segment_count)
1509  f->next_seg = -1;
1510  assert(f->bytes_in_seg == 0);
1511  f->bytes_in_seg = len;
1512  return len;
1513 }
1514 
1515 #define EOP (-1)
1516 #define INVALID_BITS (-1)
1517 
1518 static int get8_packet_raw(vorb *f)
1519 {
1520  if (!f->bytes_in_seg) { // CLANG!
1521  if (f->last_seg) return EOP;
1522  else if (!next_segment(f)) return EOP;
1523  }
1524  assert(f->bytes_in_seg > 0);
1525  --f->bytes_in_seg;
1526  ++f->packet_bytes;
1527  return get8(f);
1528 }
1529 
1530 static int get8_packet(vorb *f)
1531 {
1532  int x = get8_packet_raw(f);
1533  f->valid_bits = 0;
1534  return x;
1535 }
1536 
1537 static void flush_packet(vorb *f)
1538 {
1539  while (get8_packet_raw(f) != EOP);
1540 }
1541 
1542 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1543 // as the huffman decoder?
1544 static uint32 get_bits(vorb *f, int n)
1545 {
1546  uint32 z;
1547 
1548  if (f->valid_bits < 0) return 0;
1549  if (f->valid_bits < n) {
1550  if (n > 24) {
1551  // the accumulator technique below would not work correctly in this case
1552  z = get_bits(f, 24);
1553  z += get_bits(f, n-24) << 24;
1554  return z;
1555  }
1556  if (f->valid_bits == 0) f->acc = 0;
1557  while (f->valid_bits < n) {
1558  int z = get8_packet_raw(f);
1559  if (z == EOP) {
1560  f->valid_bits = INVALID_BITS;
1561  return 0;
1562  }
1563  f->acc += z << f->valid_bits;
1564  f->valid_bits += 8;
1565  }
1566  }
1567  if (f->valid_bits < 0) return 0;
1568  z = f->acc & ((1 << n)-1);
1569  f->acc >>= n;
1570  f->valid_bits -= n;
1571  return z;
1572 }
1573 
1574 // @OPTIMIZE: primary accumulator for huffman
1575 // expand the buffer to as many bits as possible without reading off end of packet
1576 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1577 // e.g. cache them locally and decode locally
1579 {
1580  if (f->valid_bits <= 24) {
1581  if (f->valid_bits == 0) f->acc = 0;
1582  do {
1583  int z;
1584  if (f->last_seg && !f->bytes_in_seg) return;
1585  z = get8_packet_raw(f);
1586  if (z == EOP) return;
1587  f->acc += (unsigned) z << f->valid_bits;
1588  f->valid_bits += 8;
1589  } while (f->valid_bits <= 24);
1590  }
1591 }
1592 
1593 enum
1594 {
1598 };
1599 
1601 {
1602  int i;
1603  prep_huffman(f);
1604 
1605  if (c->codewords == NULL && c->sorted_codewords == NULL)
1606  return -1;
1607 
1608  // cases to use binary search: sorted_codewords && !c->codewords
1609  // sorted_codewords && c->entries > 8
1610  if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1611  // binary search
1612  uint32 code = bit_reverse(f->acc);
1613  int x=0, n=c->sorted_entries, len;
1614 
1615  while (n > 1) {
1616  // invariant: sc[x] <= code < sc[x+n]
1617  int m = x + (n >> 1);
1618  if (c->sorted_codewords[m] <= code) {
1619  x = m;
1620  n -= (n>>1);
1621  } else {
1622  n >>= 1;
1623  }
1624  }
1625  // x is now the sorted index
1626  if (!c->sparse) x = c->sorted_values[x];
1627  // x is now sorted index if sparse, or symbol otherwise
1628  len = c->codeword_lengths[x];
1629  if (f->valid_bits >= len) {
1630  f->acc >>= len;
1631  f->valid_bits -= len;
1632  return x;
1633  }
1634 
1635  f->valid_bits = 0;
1636  return -1;
1637  }
1638 
1639  // if small, linear search
1640  assert(!c->sparse);
1641  for (i=0; i < c->entries; ++i) {
1642  if (c->codeword_lengths[i] == NO_CODE) continue;
1643  if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1644  if (f->valid_bits >= c->codeword_lengths[i]) {
1645  f->acc >>= c->codeword_lengths[i];
1646  f->valid_bits -= c->codeword_lengths[i];
1647  return i;
1648  }
1649  f->valid_bits = 0;
1650  return -1;
1651  }
1652  }
1653 
1655  f->valid_bits = 0;
1656  return -1;
1657 }
1658 
1659 #ifndef STB_VORBIS_NO_INLINE_DECODE
1660 
1661 #define DECODE_RAW(var, f,c) \
1662  if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1663  prep_huffman(f); \
1664  var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1665  var = c->fast_huffman[var]; \
1666  if (var >= 0) { \
1667  int n = c->codeword_lengths[var]; \
1668  f->acc >>= n; \
1669  f->valid_bits -= n; \
1670  if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1671  } else { \
1672  var = codebook_decode_scalar_raw(f,c); \
1673  }
1674 
1675 #else
1676 
1677 static int codebook_decode_scalar(vorb *f, Codebook *c)
1678 {
1679  int i;
1681  prep_huffman(f);
1682  // fast huffman table lookup
1683  i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1684  i = c->fast_huffman[i];
1685  if (i >= 0) {
1686  f->acc >>= c->codeword_lengths[i];
1687  f->valid_bits -= c->codeword_lengths[i];
1688  if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1689  return i;
1690  }
1691  return codebook_decode_scalar_raw(f,c);
1692 }
1693 
1694 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1695 
1696 #endif
1697 
1698 #define DECODE(var,f,c) \
1699  DECODE_RAW(var,f,c) \
1700  if (c->sparse) var = c->sorted_values[var];
1701 
1702 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1703  #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1704 #else
1705  #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1706 #endif
1707 
1708 
1709 
1710 
1711 
1712 
1713 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1714 // where we avoid one addition
1715 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1716 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1717 #define CODEBOOK_ELEMENT_BASE(c) (0)
1718 
1720 {
1721  int z = -1;
1722 
1723  // type 0 is only legal in a scalar context
1724  if (c->lookup_type == 0)
1726  else {
1727  DECODE_VQ(z,f,c);
1728  if (c->sparse) assert(z < c->sorted_entries);
1729  if (z < 0) { // check for EOP
1730  if (!f->bytes_in_seg)
1731  if (f->last_seg)
1732  return z;
1734  }
1735  }
1736  return z;
1737 }
1738 
1739 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1740 {
1741  int i,z = codebook_decode_start(f,c);
1742  if (z < 0) return FALSE;
1743  if (len > c->dimensions) len = c->dimensions;
1744 
1745 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1746  if (c->lookup_type == 1) {
1747  float last = CODEBOOK_ELEMENT_BASE(c);
1748  int div = 1;
1749  for (i=0; i < len; ++i) {
1750  int off = (z / div) % c->lookup_values;
1751  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1752  output[i] += val;
1753  if (c->sequence_p) last = val + c->minimum_value;
1754  div *= c->lookup_values;
1755  }
1756  return TRUE;
1757  }
1758 #endif
1759 
1760  z *= c->dimensions;
1761  if (c->sequence_p) {
1762  float last = CODEBOOK_ELEMENT_BASE(c);
1763  for (i=0; i < len; ++i) {
1764  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1765  output[i] += val;
1766  last = val + c->minimum_value;
1767  }
1768  } else {
1769  float last = CODEBOOK_ELEMENT_BASE(c);
1770  for (i=0; i < len; ++i) {
1771  output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1772  }
1773  }
1774 
1775  return TRUE;
1776 }
1777 
1778 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1779 {
1780  int i,z = codebook_decode_start(f,c);
1781  float last = CODEBOOK_ELEMENT_BASE(c);
1782  if (z < 0) return FALSE;
1783  if (len > c->dimensions) len = c->dimensions;
1784 
1785 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1786  if (c->lookup_type == 1) {
1787  int div = 1;
1788  for (i=0; i < len; ++i) {
1789  int off = (z / div) % c->lookup_values;
1790  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1791  output[i*step] += val;
1792  if (c->sequence_p) last = val;
1793  div *= c->lookup_values;
1794  }
1795  return TRUE;
1796  }
1797 #endif
1798 
1799  z *= c->dimensions;
1800  for (i=0; i < len; ++i) {
1801  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1802  output[i*step] += val;
1803  if (c->sequence_p) last = val;
1804  }
1805 
1806  return TRUE;
1807 }
1808 
1809 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1810 {
1811  int c_inter = *c_inter_p;
1812  int p_inter = *p_inter_p;
1813  int i,z, effective = c->dimensions;
1814 
1815  // type 0 is only legal in a scalar context
1816  if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1817 
1818  while (total_decode > 0) {
1819  float last = CODEBOOK_ELEMENT_BASE(c);
1820  DECODE_VQ(z,f,c);
1821  #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1822  assert(!c->sparse || z < c->sorted_entries);
1823  #endif
1824  if (z < 0) {
1825  if (!f->bytes_in_seg)
1826  if (f->last_seg) return FALSE;
1827  return error(f, VORBIS_invalid_stream);
1828  }
1829 
1830  // if this will take us off the end of the buffers, stop short!
1831  // we check by computing the length of the virtual interleaved
1832  // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1833  // and the length we'll be using (effective)
1834  if (c_inter + p_inter*ch + effective > len * ch) {
1835  effective = len*ch - (p_inter*ch - c_inter);
1836  }
1837 
1838  #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1839  if (c->lookup_type == 1) {
1840  int div = 1;
1841  for (i=0; i < effective; ++i) {
1842  int off = (z / div) % c->lookup_values;
1843  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1844  if (outputs[c_inter])
1845  outputs[c_inter][p_inter] += val;
1846  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1847  if (c->sequence_p) last = val;
1848  div *= c->lookup_values;
1849  }
1850  } else
1851  #endif
1852  {
1853  z *= c->dimensions;
1854  if (c->sequence_p) {
1855  for (i=0; i < effective; ++i) {
1856  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1857  if (outputs[c_inter])
1858  outputs[c_inter][p_inter] += val;
1859  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1860  last = val;
1861  }
1862  } else {
1863  for (i=0; i < effective; ++i) {
1864  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1865  if (outputs[c_inter])
1866  outputs[c_inter][p_inter] += val;
1867  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1868  }
1869  }
1870  }
1871 
1872  total_decode -= effective;
1873  }
1874  *c_inter_p = c_inter;
1875  *p_inter_p = p_inter;
1876  return TRUE;
1877 }
1878 
1879 static int predict_point(int x, int x0, int x1, int y0, int y1)
1880 {
1881  int dy = y1 - y0;
1882  int adx = x1 - x0;
1883  // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1884  int err = abs(dy) * (x - x0);
1885  int off = err / adx;
1886  return dy < 0 ? y0 - off : y0 + off;
1887 }
1888 
1889 // the following table is block-copied from the specification
1890 static float inverse_db_table[256] =
1891 {
1892  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1893  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1894  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1895  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1896  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1897  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1898  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1899  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1900  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1901  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1902  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1903  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1904  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1905  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1906  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1907  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1908  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1909  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1910  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1911  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1912  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1913  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1914  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1915  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1916  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1917  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1918  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1919  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1920  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1921  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1922  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1923  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1924  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1925  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1926  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1927  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1928  0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1929  0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1930  0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1931  0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1932  0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1933  0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1934  0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1935  0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1936  0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1937  0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1938  0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1939  0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1940  0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1941  0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1942  0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1943  0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1944  0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1945  0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1946  0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1947  0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1948  0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1949  0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1950  0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1951  0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1952  0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1953  0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1954  0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1955  0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1956 };
1957 
1958 
1959 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1960 // note that you must produce bit-identical output to decode correctly;
1961 // this specific sequence of operations is specified in the spec (it's
1962 // drawing integer-quantized frequency-space lines that the encoder
1963 // expects to be exactly the same)
1964 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1965 // have to divide in the setup? sigh.
1966 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1967 #define LINE_OP(a,b) a *= b
1968 #else
1969 #define LINE_OP(a,b) a = b
1970 #endif
1971 
1972 #ifdef STB_VORBIS_DIVIDE_TABLE
1973 #define DIVTAB_NUMER 32
1974 #define DIVTAB_DENOM 64
1975 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1976 #endif
1977 
1978 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1979 {
1980  int dy = y1 - y0;
1981  int adx = x1 - x0;
1982  int ady = abs(dy);
1983  int base;
1984  int x=x0,y=y0;
1985  int err = 0;
1986  int sy;
1987 
1988 #ifdef STB_VORBIS_DIVIDE_TABLE
1989  if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1990  if (dy < 0) {
1991  base = -integer_divide_table[ady][adx];
1992  sy = base-1;
1993  } else {
1994  base = integer_divide_table[ady][adx];
1995  sy = base+1;
1996  }
1997  } else {
1998  base = dy / adx;
1999  if (dy < 0)
2000  sy = base - 1;
2001  else
2002  sy = base+1;
2003  }
2004 #else
2005  base = dy / adx;
2006  if (dy < 0)
2007  sy = base - 1;
2008  else
2009  sy = base+1;
2010 #endif
2011  ady -= abs(base) * adx;
2012  if (x1 > n) x1 = n;
2013  if (x < x1) {
2014  LINE_OP(output[x], inverse_db_table[y]);
2015  for (++x; x < x1; ++x) {
2016  err += ady;
2017  if (err >= adx) {
2018  err -= adx;
2019  y += sy;
2020  } else
2021  y += base;
2022  LINE_OP(output[x], inverse_db_table[y]);
2023  }
2024  }
2025 }
2026 
2027 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2028 {
2029  int k;
2030  if (rtype == 0) {
2031  int step = n / book->dimensions;
2032  for (k=0; k < step; ++k)
2033  if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2034  return FALSE;
2035  } else {
2036  for (k=0; k < n; ) {
2037  if (!codebook_decode(f, book, target+offset, n-k))
2038  return FALSE;
2039  k += book->dimensions;
2040  offset += book->dimensions;
2041  }
2042  }
2043  return TRUE;
2044 }
2045 
2046 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2047 {
2048  int i,j,pass;
2049  Residue *r = f->residue_config + rn;
2050  int rtype = f->residue_types[rn];
2051  int c = r->classbook;
2052  int classwords = f->codebooks[c].dimensions;
2053  int n_read = r->end - r->begin;
2054  int part_read = n_read / r->part_size;
2055  int temp_alloc_point = temp_alloc_save(f);
2056  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2057  uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2058  #else
2059  int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2060  #endif
2061 
2062  CHECK(f);
2063 
2064  for (i=0; i < ch; ++i)
2065  if (!do_not_decode[i])
2066  memset(residue_buffers[i], 0, sizeof(float) * n);
2067 
2068  if (rtype == 2 && ch != 1) {
2069  for (j=0; j < ch; ++j)
2070  if (!do_not_decode[j])
2071  break;
2072  if (j == ch)
2073  goto done;
2074 
2075  for (pass=0; pass < 8; ++pass) {
2076  int pcount = 0, class_set = 0;
2077  if (ch == 2) {
2078  while (pcount < part_read) {
2079  int z = r->begin + pcount*r->part_size;
2080  int c_inter = (z & 1), p_inter = z>>1;
2081  if (pass == 0) {
2082  Codebook *c = f->codebooks+r->classbook;
2083  int q;
2084  DECODE(q,f,c);
2085  if (q == EOP) goto done;
2086  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2087  part_classdata[0][class_set] = r->classdata[q];
2088  #else
2089  for (i=classwords-1; i >= 0; --i) {
2090  classifications[0][i+pcount] = q % r->classifications;
2091  q /= r->classifications;
2092  }
2093  #endif
2094  }
2095  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2096  int z = r->begin + pcount*r->part_size;
2097  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2098  int c = part_classdata[0][class_set][i];
2099  #else
2100  int c = classifications[0][pcount];
2101  #endif
2102  int b = r->residue_books[c][pass];
2103  if (b >= 0) {
2104  Codebook *book = f->codebooks + b;
2105  #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2106  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2107  goto done;
2108  #else
2109  // saves 1%
2110  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2111  goto done;
2112  #endif
2113  } else {
2114  z += r->part_size;
2115  c_inter = z & 1;
2116  p_inter = z >> 1;
2117  }
2118  }
2119  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2120  ++class_set;
2121  #endif
2122  }
2123  } else if (ch == 1) {
2124  while (pcount < part_read) {
2125  int z = r->begin + pcount*r->part_size;
2126  int c_inter = 0, p_inter = z;
2127  if (pass == 0) {
2128  Codebook *c = f->codebooks+r->classbook;
2129  int q;
2130  DECODE(q,f,c);
2131  if (q == EOP) goto done;
2132  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2133  part_classdata[0][class_set] = r->classdata[q];
2134  #else
2135  for (i=classwords-1; i >= 0; --i) {
2136  classifications[0][i+pcount] = q % r->classifications;
2137  q /= r->classifications;
2138  }
2139  #endif
2140  }
2141  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2142  int z = r->begin + pcount*r->part_size;
2143  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2144  int c = part_classdata[0][class_set][i];
2145  #else
2146  int c = classifications[0][pcount];
2147  #endif
2148  int b = r->residue_books[c][pass];
2149  if (b >= 0) {
2150  Codebook *book = f->codebooks + b;
2151  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2152  goto done;
2153  } else {
2154  z += r->part_size;
2155  c_inter = 0;
2156  p_inter = z;
2157  }
2158  }
2159  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2160  ++class_set;
2161  #endif
2162  }
2163  } else {
2164  while (pcount < part_read) {
2165  int z = r->begin + pcount*r->part_size;
2166  int c_inter = z % ch, p_inter = z/ch;
2167  if (pass == 0) {
2168  Codebook *c = f->codebooks+r->classbook;
2169  int q;
2170  DECODE(q,f,c);
2171  if (q == EOP) goto done;
2172  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2173  part_classdata[0][class_set] = r->classdata[q];
2174  #else
2175  for (i=classwords-1; i >= 0; --i) {
2176  classifications[0][i+pcount] = q % r->classifications;
2177  q /= r->classifications;
2178  }
2179  #endif
2180  }
2181  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2182  int z = r->begin + pcount*r->part_size;
2183  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2184  int c = part_classdata[0][class_set][i];
2185  #else
2186  int c = classifications[0][pcount];
2187  #endif
2188  int b = r->residue_books[c][pass];
2189  if (b >= 0) {
2190  Codebook *book = f->codebooks + b;
2191  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2192  goto done;
2193  } else {
2194  z += r->part_size;
2195  c_inter = z % ch;
2196  p_inter = z / ch;
2197  }
2198  }
2199  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2200  ++class_set;
2201  #endif
2202  }
2203  }
2204  }
2205  goto done;
2206  }
2207  CHECK(f);
2208 
2209  for (pass=0; pass < 8; ++pass) {
2210  int pcount = 0, class_set=0;
2211  while (pcount < part_read) {
2212  if (pass == 0) {
2213  for (j=0; j < ch; ++j) {
2214  if (!do_not_decode[j]) {
2215  Codebook *c = f->codebooks+r->classbook;
2216  int temp;
2217  DECODE(temp,f,c);
2218  if (temp == EOP) goto done;
2219  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2220  part_classdata[j][class_set] = r->classdata[temp];
2221  #else
2222  for (i=classwords-1; i >= 0; --i) {
2223  classifications[j][i+pcount] = temp % r->classifications;
2224  temp /= r->classifications;
2225  }
2226  #endif
2227  }
2228  }
2229  }
2230  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2231  for (j=0; j < ch; ++j) {
2232  if (!do_not_decode[j]) {
2233  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2234  int c = part_classdata[j][class_set][i];
2235  #else
2236  int c = classifications[j][pcount];
2237  #endif
2238  int b = r->residue_books[c][pass];
2239  if (b >= 0) {
2240  float *target = residue_buffers[j];
2241  int offset = r->begin + pcount * r->part_size;
2242  int n = r->part_size;
2243  Codebook *book = f->codebooks + b;
2244  if (!residue_decode(f, book, target, offset, n, rtype))
2245  goto done;
2246  }
2247  }
2248  }
2249  }
2250  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2251  ++class_set;
2252  #endif
2253  }
2254  }
2255  done:
2256  CHECK(f);
2257  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2258 // temp_free(f,part_classdata);
2259  #else
2260  temp_free(f,classifications);
2261  #endif
2262  temp_alloc_restore(f,temp_alloc_point);
2263 }
2264 
2265 
2266 #if 0
2267 // slow way for debugging
2268 void inverse_mdct_slow(float *buffer, int n)
2269 {
2270  int i,j;
2271  int n2 = n >> 1;
2272  float *x = (float *) malloc(sizeof(*x) * n2);
2273  memcpy(x, buffer, sizeof(*x) * n2);
2274  for (i=0; i < n; ++i) {
2275  float acc = 0;
2276  for (j=0; j < n2; ++j)
2277  // formula from paper:
2278  //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2279  // formula from wikipedia
2280  //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2281  // these are equivalent, except the formula from the paper inverts the multiplier!
2282  // however, what actually works is NO MULTIPLIER!?!
2283  //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2284  acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2285  buffer[i] = acc;
2286  }
2287  free(x);
2288 }
2289 #elif 0
2290 // same as above, but just barely able to run in real time on modern machines
2291 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2292 {
2293  float mcos[16384];
2294  int i,j;
2295  int n2 = n >> 1, nmask = (n << 2) -1;
2296  float *x = (float *) malloc(sizeof(*x) * n2);
2297  memcpy(x, buffer, sizeof(*x) * n2);
2298  for (i=0; i < 4*n; ++i)
2299  mcos[i] = (float) cos(M_PI / 2 * i / n);
2300 
2301  for (i=0; i < n; ++i) {
2302  float acc = 0;
2303  for (j=0; j < n2; ++j)
2304  acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2305  buffer[i] = acc;
2306  }
2307  free(x);
2308 }
2309 #elif 0
2310 // transform to use a slow dct-iv; this is STILL basically trivial,
2311 // but only requires half as many ops
2312 void dct_iv_slow(float *buffer, int n)
2313 {
2314  float mcos[16384];
2315  float x[2048];
2316  int i,j;
2317  int n2 = n >> 1, nmask = (n << 3) - 1;
2318  memcpy(x, buffer, sizeof(*x) * n);
2319  for (i=0; i < 8*n; ++i)
2320  mcos[i] = (float) cos(M_PI / 4 * i / n);
2321  for (i=0; i < n; ++i) {
2322  float acc = 0;
2323  for (j=0; j < n; ++j)
2324  acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2325  buffer[i] = acc;
2326  }
2327 }
2328 
2329 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2330 {
2331  int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2332  float temp[4096];
2333 
2334  memcpy(temp, buffer, n2 * sizeof(float));
2335  dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2336 
2337  for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2338  for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2339  for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2340 }
2341 #endif
2342 
2343 #ifndef LIBVORBIS_MDCT
2344 #define LIBVORBIS_MDCT 0
2345 #endif
2346 
2347 #if LIBVORBIS_MDCT
2348 // directly call the vorbis MDCT using an interface documented
2349 // by Jeff Roberts... useful for performance comparison
2350 typedef struct
2351 {
2352  int n;
2353  int log2n;
2354 
2355  float *trig;
2356  int *bitrev;
2357 
2358  float scale;
2359 } mdct_lookup;
2360 
2361 extern void mdct_init(mdct_lookup *lookup, int n);
2362 extern void mdct_clear(mdct_lookup *l);
2363 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2364 
2365 mdct_lookup M1,M2;
2366 
2367 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2368 {
2369  mdct_lookup *M;
2370  if (M1.n == n) M = &M1;
2371  else if (M2.n == n) M = &M2;
2372  else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2373  else {
2374  if (M2.n) __asm int 3;
2375  mdct_init(&M2, n);
2376  M = &M2;
2377  }
2378 
2379  mdct_backward(M, buffer, buffer);
2380 }
2381 #endif
2382 
2383 
2384 // the following were split out into separate functions while optimizing;
2385 // they could be pushed back up but eh. __forceinline showed no change;
2386 // they're probably already being inlined.
2387 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2388 {
2389  float *ee0 = e + i_off;
2390  float *ee2 = ee0 + k_off;
2391  int i;
2392 
2393  assert((n & 3) == 0);
2394  for (i=(n>>2); i > 0; --i) {
2395  float k00_20, k01_21;
2396  k00_20 = ee0[ 0] - ee2[ 0];
2397  k01_21 = ee0[-1] - ee2[-1];
2398  ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2399  ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2400  ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2401  ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2402  A += 8;
2403 
2404  k00_20 = ee0[-2] - ee2[-2];
2405  k01_21 = ee0[-3] - ee2[-3];
2406  ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2407  ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2408  ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2409  ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2410  A += 8;
2411 
2412  k00_20 = ee0[-4] - ee2[-4];
2413  k01_21 = ee0[-5] - ee2[-5];
2414  ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2415  ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2416  ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2417  ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2418  A += 8;
2419 
2420  k00_20 = ee0[-6] - ee2[-6];
2421  k01_21 = ee0[-7] - ee2[-7];
2422  ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2423  ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2424  ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2425  ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2426  A += 8;
2427  ee0 -= 8;
2428  ee2 -= 8;
2429  }
2430 }
2431 
2432 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2433 {
2434  int i;
2435  float k00_20, k01_21;
2436 
2437  float *e0 = e + d0;
2438  float *e2 = e0 + k_off;
2439 
2440  for (i=lim >> 2; i > 0; --i) {
2441  k00_20 = e0[-0] - e2[-0];
2442  k01_21 = e0[-1] - e2[-1];
2443  e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2444  e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2445  e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2446  e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2447 
2448  A += k1;
2449 
2450  k00_20 = e0[-2] - e2[-2];
2451  k01_21 = e0[-3] - e2[-3];
2452  e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2453  e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2454  e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2455  e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2456 
2457  A += k1;
2458 
2459  k00_20 = e0[-4] - e2[-4];
2460  k01_21 = e0[-5] - e2[-5];
2461  e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2462  e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2463  e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2464  e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2465 
2466  A += k1;
2467 
2468  k00_20 = e0[-6] - e2[-6];
2469  k01_21 = e0[-7] - e2[-7];
2470  e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2471  e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2472  e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2473  e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2474 
2475  e0 -= 8;
2476  e2 -= 8;
2477 
2478  A += k1;
2479  }
2480 }
2481 
2482 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2483 {
2484  int i;
2485  float A0 = A[0];
2486  float A1 = A[0+1];
2487  float A2 = A[0+a_off];
2488  float A3 = A[0+a_off+1];
2489  float A4 = A[0+a_off*2+0];
2490  float A5 = A[0+a_off*2+1];
2491  float A6 = A[0+a_off*3+0];
2492  float A7 = A[0+a_off*3+1];
2493 
2494  float k00,k11;
2495 
2496  float *ee0 = e +i_off;
2497  float *ee2 = ee0+k_off;
2498 
2499  for (i=n; i > 0; --i) {
2500  k00 = ee0[ 0] - ee2[ 0];
2501  k11 = ee0[-1] - ee2[-1];
2502  ee0[ 0] = ee0[ 0] + ee2[ 0];
2503  ee0[-1] = ee0[-1] + ee2[-1];
2504  ee2[ 0] = (k00) * A0 - (k11) * A1;
2505  ee2[-1] = (k11) * A0 + (k00) * A1;
2506 
2507  k00 = ee0[-2] - ee2[-2];
2508  k11 = ee0[-3] - ee2[-3];
2509  ee0[-2] = ee0[-2] + ee2[-2];
2510  ee0[-3] = ee0[-3] + ee2[-3];
2511  ee2[-2] = (k00) * A2 - (k11) * A3;
2512  ee2[-3] = (k11) * A2 + (k00) * A3;
2513 
2514  k00 = ee0[-4] - ee2[-4];
2515  k11 = ee0[-5] - ee2[-5];
2516  ee0[-4] = ee0[-4] + ee2[-4];
2517  ee0[-5] = ee0[-5] + ee2[-5];
2518  ee2[-4] = (k00) * A4 - (k11) * A5;
2519  ee2[-5] = (k11) * A4 + (k00) * A5;
2520 
2521  k00 = ee0[-6] - ee2[-6];
2522  k11 = ee0[-7] - ee2[-7];
2523  ee0[-6] = ee0[-6] + ee2[-6];
2524  ee0[-7] = ee0[-7] + ee2[-7];
2525  ee2[-6] = (k00) * A6 - (k11) * A7;
2526  ee2[-7] = (k11) * A6 + (k00) * A7;
2527 
2528  ee0 -= k0;
2529  ee2 -= k0;
2530  }
2531 }
2532 
2533 static __forceinline void iter_54(float *z)
2534 {
2535  float k00,k11,k22,k33;
2536  float y0,y1,y2,y3;
2537 
2538  k00 = z[ 0] - z[-4];
2539  y0 = z[ 0] + z[-4];
2540  y2 = z[-2] + z[-6];
2541  k22 = z[-2] - z[-6];
2542 
2543  z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2544  z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2545 
2546  // done with y0,y2
2547 
2548  k33 = z[-3] - z[-7];
2549 
2550  z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2551  z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2552 
2553  // done with k33
2554 
2555  k11 = z[-1] - z[-5];
2556  y1 = z[-1] + z[-5];
2557  y3 = z[-3] + z[-7];
2558 
2559  z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2560  z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2561  z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2562  z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2563 }
2564 
2565 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2566 {
2567  int a_off = base_n >> 3;
2568  float A2 = A[0+a_off];
2569  float *z = e + i_off;
2570  float *base = z - 16 * n;
2571 
2572  while (z > base) {
2573  float k00,k11;
2574 
2575  k00 = z[-0] - z[-8];
2576  k11 = z[-1] - z[-9];
2577  z[-0] = z[-0] + z[-8];
2578  z[-1] = z[-1] + z[-9];
2579  z[-8] = k00;
2580  z[-9] = k11 ;
2581 
2582  k00 = z[ -2] - z[-10];
2583  k11 = z[ -3] - z[-11];
2584  z[ -2] = z[ -2] + z[-10];
2585  z[ -3] = z[ -3] + z[-11];
2586  z[-10] = (k00+k11) * A2;
2587  z[-11] = (k11-k00) * A2;
2588 
2589  k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2590  k11 = z[ -5] - z[-13];
2591  z[ -4] = z[ -4] + z[-12];
2592  z[ -5] = z[ -5] + z[-13];
2593  z[-12] = k11;
2594  z[-13] = k00;
2595 
2596  k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2597  k11 = z[ -7] - z[-15];
2598  z[ -6] = z[ -6] + z[-14];
2599  z[ -7] = z[ -7] + z[-15];
2600  z[-14] = (k00+k11) * A2;
2601  z[-15] = (k00-k11) * A2;
2602 
2603  iter_54(z);
2604  iter_54(z-8);
2605  z -= 16;
2606  }
2607 }
2608 
2609 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2610 {
2611  int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2612  int ld;
2613  // @OPTIMIZE: reduce register pressure by using fewer variables?
2614  int save_point = temp_alloc_save(f);
2615  float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2616  float *u=NULL,*v=NULL;
2617  // twiddle factors
2618  float *A = f->A[blocktype];
2619 
2620  // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2621  // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2622 
2623  // kernel from paper
2624 
2625 
2626  // merged:
2627  // copy and reflect spectral data
2628  // step 0
2629 
2630  // note that it turns out that the items added together during
2631  // this step are, in fact, being added to themselves (as reflected
2632  // by step 0). inexplicable inefficiency! this became obvious
2633  // once I combined the passes.
2634 
2635  // so there's a missing 'times 2' here (for adding X to itself).
2636  // this propogates through linearly to the end, where the numbers
2637  // are 1/2 too small, and need to be compensated for.
2638 
2639  {
2640  float *d,*e, *AA, *e_stop;
2641  d = &buf2[n2-2];
2642  AA = A;
2643  e = &buffer[0];
2644  e_stop = &buffer[n2];
2645  while (e != e_stop) {
2646  d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2647  d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2648  d -= 2;
2649  AA += 2;
2650  e += 4;
2651  }
2652 
2653  e = &buffer[n2-3];
2654  while (d >= buf2) {
2655  d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2656  d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2657  d -= 2;
2658  AA += 2;
2659  e -= 4;
2660  }
2661  }
2662 
2663  // now we use symbolic names for these, so that we can
2664  // possibly swap their meaning as we change which operations
2665  // are in place
2666 
2667  u = buffer;
2668  v = buf2;
2669 
2670  // step 2 (paper output is w, now u)
2671  // this could be in place, but the data ends up in the wrong
2672  // place... _somebody_'s got to swap it, so this is nominated
2673  {
2674  float *AA = &A[n2-8];
2675  float *d0,*d1, *e0, *e1;
2676 
2677  e0 = &v[n4];
2678  e1 = &v[0];
2679 
2680  d0 = &u[n4];
2681  d1 = &u[0];
2682 
2683  while (AA >= A) {
2684  float v40_20, v41_21;
2685 
2686  v41_21 = e0[1] - e1[1];
2687  v40_20 = e0[0] - e1[0];
2688  d0[1] = e0[1] + e1[1];
2689  d0[0] = e0[0] + e1[0];
2690  d1[1] = v41_21*AA[4] - v40_20*AA[5];
2691  d1[0] = v40_20*AA[4] + v41_21*AA[5];
2692 
2693  v41_21 = e0[3] - e1[3];
2694  v40_20 = e0[2] - e1[2];
2695  d0[3] = e0[3] + e1[3];
2696  d0[2] = e0[2] + e1[2];
2697  d1[3] = v41_21*AA[0] - v40_20*AA[1];
2698  d1[2] = v40_20*AA[0] + v41_21*AA[1];
2699 
2700  AA -= 8;
2701 
2702  d0 += 4;
2703  d1 += 4;
2704  e0 += 4;
2705  e1 += 4;
2706  }
2707  }
2708 
2709  // step 3
2710  ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2711 
2712  // optimized step 3:
2713 
2714  // the original step3 loop can be nested r inside s or s inside r;
2715  // it's written originally as s inside r, but this is dumb when r
2716  // iterates many times, and s few. So I have two copies of it and
2717  // switch between them halfway.
2718 
2719  // this is iteration 0 of step 3
2720  imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2721  imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2722 
2723  // this is iteration 1 of step 3
2724  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2725  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2726  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2727  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2728 
2729  l=2;
2730  for (; l < (ld-3)>>1; ++l) {
2731  int k0 = n >> (l+2), k0_2 = k0>>1;
2732  int lim = 1 << (l+1);
2733  int i;
2734  for (i=0; i < lim; ++i)
2735  imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2736  }
2737 
2738  for (; l < ld-6; ++l) {
2739  int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2740  int rlim = n >> (l+6), r;
2741  int lim = 1 << (l+1);
2742  int i_off;
2743  float *A0 = A;
2744  i_off = n2-1;
2745  for (r=rlim; r > 0; --r) {
2746  imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2747  A0 += k1*4;
2748  i_off -= 8;
2749  }
2750  }
2751 
2752  // iterations with count:
2753  // ld-6,-5,-4 all interleaved together
2754  // the big win comes from getting rid of needless flops
2755  // due to the constants on pass 5 & 4 being all 1 and 0;
2756  // combining them to be simultaneous to improve cache made little difference
2757  imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2758 
2759  // output is u
2760 
2761  // step 4, 5, and 6
2762  // cannot be in-place because of step 5
2763  {
2764  uint16 *bitrev = f->bit_reverse[blocktype];
2765  // weirdly, I'd have thought reading sequentially and writing
2766  // erratically would have been better than vice-versa, but in
2767  // fact that's not what my testing showed. (That is, with
2768  // j = bitreverse(i), do you read i and write j, or read j and write i.)
2769 
2770  float *d0 = &v[n4-4];
2771  float *d1 = &v[n2-4];
2772  while (d0 >= v) {
2773  int k4;
2774 
2775  k4 = bitrev[0];
2776  d1[3] = u[k4+0];
2777  d1[2] = u[k4+1];
2778  d0[3] = u[k4+2];
2779  d0[2] = u[k4+3];
2780 
2781  k4 = bitrev[1];
2782  d1[1] = u[k4+0];
2783  d1[0] = u[k4+1];
2784  d0[1] = u[k4+2];
2785  d0[0] = u[k4+3];
2786 
2787  d0 -= 4;
2788  d1 -= 4;
2789  bitrev += 2;
2790  }
2791  }
2792  // (paper output is u, now v)
2793 
2794 
2795  // data must be in buf2
2796  assert(v == buf2);
2797 
2798  // step 7 (paper output is v, now v)
2799  // this is now in place
2800  {
2801  float *C = f->C[blocktype];
2802  float *d, *e;
2803 
2804  d = v;
2805  e = v + n2 - 4;
2806 
2807  while (d < e) {
2808  float a02,a11,b0,b1,b2,b3;
2809 
2810  a02 = d[0] - e[2];
2811  a11 = d[1] + e[3];
2812 
2813  b0 = C[1]*a02 + C[0]*a11;
2814  b1 = C[1]*a11 - C[0]*a02;
2815 
2816  b2 = d[0] + e[ 2];
2817  b3 = d[1] - e[ 3];
2818 
2819  d[0] = b2 + b0;
2820  d[1] = b3 + b1;
2821  e[2] = b2 - b0;
2822  e[3] = b1 - b3;
2823 
2824  a02 = d[2] - e[0];
2825  a11 = d[3] + e[1];
2826 
2827  b0 = C[3]*a02 + C[2]*a11;
2828  b1 = C[3]*a11 - C[2]*a02;
2829 
2830  b2 = d[2] + e[ 0];
2831  b3 = d[3] - e[ 1];
2832 
2833  d[2] = b2 + b0;
2834  d[3] = b3 + b1;
2835  e[0] = b2 - b0;
2836  e[1] = b1 - b3;
2837 
2838  C += 4;
2839  d += 4;
2840  e -= 4;
2841  }
2842  }
2843 
2844  // data must be in buf2
2845 
2846 
2847  // step 8+decode (paper output is X, now buffer)
2848  // this generates pairs of data a la 8 and pushes them directly through
2849  // the decode kernel (pushing rather than pulling) to avoid having
2850  // to make another pass later
2851 
2852  // this cannot POSSIBLY be in place, so we refer to the buffers directly
2853 
2854  {
2855  float *d0,*d1,*d2,*d3;
2856 
2857  float *B = f->B[blocktype] + n2 - 8;
2858  float *e = buf2 + n2 - 8;
2859  d0 = &buffer[0];
2860  d1 = &buffer[n2-4];
2861  d2 = &buffer[n2];
2862  d3 = &buffer[n-4];
2863  while (e >= v) {
2864  float p0,p1,p2,p3;
2865 
2866  p3 = e[6]*B[7] - e[7]*B[6];
2867  p2 = -e[6]*B[6] - e[7]*B[7];
2868 
2869  d0[0] = p3;
2870  d1[3] = - p3;
2871  d2[0] = p2;
2872  d3[3] = p2;
2873 
2874  p1 = e[4]*B[5] - e[5]*B[4];
2875  p0 = -e[4]*B[4] - e[5]*B[5];
2876 
2877  d0[1] = p1;
2878  d1[2] = - p1;
2879  d2[1] = p0;
2880  d3[2] = p0;
2881 
2882  p3 = e[2]*B[3] - e[3]*B[2];
2883  p2 = -e[2]*B[2] - e[3]*B[3];
2884 
2885  d0[2] = p3;
2886  d1[1] = - p3;
2887  d2[2] = p2;
2888  d3[1] = p2;
2889 
2890  p1 = e[0]*B[1] - e[1]*B[0];
2891  p0 = -e[0]*B[0] - e[1]*B[1];
2892 
2893  d0[3] = p1;
2894  d1[0] = - p1;
2895  d2[3] = p0;
2896  d3[0] = p0;
2897 
2898  B -= 8;
2899  e -= 8;
2900  d0 += 4;
2901  d2 += 4;
2902  d1 -= 4;
2903  d3 -= 4;
2904  }
2905  }
2906 
2907 // temp_free(f,buf2);
2908  temp_alloc_restore(f,save_point);
2909 }
2910 
2911 #if 0
2912 // this is the original version of the above code, if you want to optimize it from scratch
2913 void inverse_mdct_naive(float *buffer, int n)
2914 {
2915  float s;
2916  float A[1 << 12], B[1 << 12], C[1 << 11];
2917  int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2918  int n3_4 = n - n4, ld;
2919  // how can they claim this only uses N words?!
2920  // oh, because they're only used sparsely, whoops
2921  float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2922  // set up twiddle factors
2923 
2924  for (k=k2=0; k < n4; ++k,k2+=2) {
2925  A[k2 ] = (float) cos(4*k*M_PI/n);
2926  A[k2+1] = (float) -sin(4*k*M_PI/n);
2927  B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2928  B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2929  }
2930  for (k=k2=0; k < n8; ++k,k2+=2) {
2931  C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2932  C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2933  }
2934 
2935  // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2936  // Note there are bugs in that pseudocode, presumably due to them attempting
2937  // to rename the arrays nicely rather than representing the way their actual
2938  // implementation bounces buffers back and forth. As a result, even in the
2939  // "some formulars corrected" version, a direct implementation fails. These
2940  // are noted below as "paper bug".
2941 
2942  // copy and reflect spectral data
2943  for (k=0; k < n2; ++k) u[k] = buffer[k];
2944  for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2945  // kernel from paper
2946  // step 1
2947  for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2948  v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2949  v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2950  }
2951  // step 2
2952  for (k=k4=0; k < n8; k+=1, k4+=4) {
2953  w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2954  w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2955  w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2956  w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2957  }
2958  // step 3
2959  ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2960  for (l=0; l < ld-3; ++l) {
2961  int k0 = n >> (l+2), k1 = 1 << (l+3);
2962  int rlim = n >> (l+4), r4, r;
2963  int s2lim = 1 << (l+2), s2;
2964  for (r=r4=0; r < rlim; r4+=4,++r) {
2965  for (s2=0; s2 < s2lim; s2+=2) {
2966  u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2967  u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2968  u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2969  - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2970  u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2971  + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2972  }
2973  }
2974  if (l+1 < ld-3) {
2975  // paper bug: ping-ponging of u&w here is omitted
2976  memcpy(w, u, sizeof(u));
2977  }
2978  }
2979 
2980  // step 4
2981  for (i=0; i < n8; ++i) {
2982  int j = bit_reverse(i) >> (32-ld+3);
2983  assert(j < n8);
2984  if (i == j) {
2985  // paper bug: original code probably swapped in place; if copying,
2986  // need to directly copy in this case
2987  int i8 = i << 3;
2988  v[i8+1] = u[i8+1];
2989  v[i8+3] = u[i8+3];
2990  v[i8+5] = u[i8+5];
2991  v[i8+7] = u[i8+7];
2992  } else if (i < j) {
2993  int i8 = i << 3, j8 = j << 3;
2994  v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2995  v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2996  v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2997  v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2998  }
2999  }
3000  // step 5
3001  for (k=0; k < n2; ++k) {
3002  w[k] = v[k*2+1];
3003  }
3004  // step 6
3005  for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3006  u[n-1-k2] = w[k4];
3007  u[n-2-k2] = w[k4+1];
3008  u[n3_4 - 1 - k2] = w[k4+2];
3009  u[n3_4 - 2 - k2] = w[k4+3];
3010  }
3011  // step 7
3012  for (k=k2=0; k < n8; ++k, k2 += 2) {
3013  v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3014  v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3015  v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3016  v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3017  }
3018  // step 8
3019  for (k=k2=0; k < n4; ++k,k2 += 2) {
3020  X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3021  X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3022  }
3023 
3024  // decode kernel to output
3025  // determined the following value experimentally
3026  // (by first figuring out what made inverse_mdct_slow work); then matching that here
3027  // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3028  s = 0.5; // theoretically would be n4
3029 
3030  // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3031  // so it needs to use the "old" B values to behave correctly, or else
3032  // set s to 1.0 ]]]
3033  for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3034  for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3035  for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3036 }
3037 #endif
3038 
3039 static float *get_window(vorb *f, int len)
3040 {
3041  len <<= 1;
3042  if (len == f->blocksize_0) return f->window[0];
3043  if (len == f->blocksize_1) return f->window[1];
3044  assert(0);
3045  return NULL;
3046 }
3047 
3048 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3049 typedef int16 YTYPE;
3050 #else
3051 typedef int YTYPE;
3052 #endif
3053 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3054 {
3055  int n2 = n >> 1;
3056  int s = map->chan[i].mux, floor;
3057  floor = map->submap_floor[s];
3058  if (f->floor_types[floor] == 0) {
3059  return error(f, VORBIS_invalid_stream);
3060  } else {
3061  Floor1 *g = &f->floor_config[floor].floor1;
3062  int j,q;
3063  int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3064  for (q=1; q < g->values; ++q) {
3065  j = g->sorted_order[q];
3066  #ifndef STB_VORBIS_NO_DEFER_FLOOR
3067  if (finalY[j] >= 0)
3068  #else
3069  if (step2_flag[j])
3070  #endif
3071  {
3072  int hy = finalY[j] * g->floor1_multiplier;
3073  int hx = g->Xlist[j];
3074  if (lx != hx)
3075  draw_line(target, lx,ly, hx,hy, n2);
3076  CHECK(f);
3077  lx = hx, ly = hy;
3078  }
3079  }
3080  if (lx < n2) {
3081  // optimization of: draw_line(target, lx,ly, n,ly, n2);
3082  for (j=lx; j < n2; ++j)
3083  LINE_OP(target[j], inverse_db_table[ly]);
3084  CHECK(f);
3085  }
3086  }
3087  return TRUE;
3088 }
3089 
3090 // The meaning of "left" and "right"
3091 //
3092 // For a given frame:
3093 // we compute samples from 0..n
3094 // window_center is n/2
3095 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3096 // all of the samples from left_end to right_start can be output without mixing; however,
3097 // this interval is 0-length except when transitioning between short and long frames
3098 // all of the samples from right_start to right_end need to be mixed with the next frame,
3099 // which we don't have, so those get saved in a buffer
3100 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3101 // has to be the same as frame N+1's left_end-left_start (which they are by
3102 // construction)
3103 
3104 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3105 {
3106  Mode *m;
3107  int i, n, prev, next, window_center;
3109 
3110  retry:
3111  if (f->eof) return FALSE;
3112  if (!maybe_start_packet(f))
3113  return FALSE;
3114  // check packet type
3115  if (get_bits(f,1) != 0) {
3116  if (IS_PUSH_MODE(f))
3117  return error(f,VORBIS_bad_packet_type);
3118  while (EOP != get8_packet(f));
3119  goto retry;
3120  }
3121 
3122  if (f->alloc.alloc_buffer)
3124 
3125  i = get_bits(f, ilog(f->mode_count-1));
3126  if (i == EOP) return FALSE;
3127  if (i >= f->mode_count) return FALSE;
3128  *mode = i;
3129  m = f->mode_config + i;
3130  if (m->blockflag) {
3131  n = f->blocksize_1;
3132  prev = get_bits(f,1);
3133  next = get_bits(f,1);
3134  } else {
3135  prev = next = 0;
3136  n = f->blocksize_0;
3137  }
3138 
3139 // WINDOWING
3140 
3141  window_center = n >> 1;
3142  if (m->blockflag && !prev) {
3143  *p_left_start = (n - f->blocksize_0) >> 2;
3144  *p_left_end = (n + f->blocksize_0) >> 2;
3145  } else {
3146  *p_left_start = 0;
3147  *p_left_end = window_center;
3148  }
3149  if (m->blockflag && !next) {
3150  *p_right_start = (n*3 - f->blocksize_0) >> 2;
3151  *p_right_end = (n*3 + f->blocksize_0) >> 2;
3152  } else {
3153  *p_right_start = window_center;
3154  *p_right_end = n;
3155  }
3156 
3157  return TRUE;
3158 }
3159 
3160 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3161 {
3162  Mapping *map;
3163  int i,j,k,n,n2;
3164  int zero_channel[256];
3165  int really_zero_channel[256];
3166 
3167 // WINDOWING
3168 
3169  n = f->blocksize[m->blockflag];
3170  map = &f->mapping[m->mapping];
3171 
3172 // FLOORS
3173  n2 = n >> 1;
3174 
3175  CHECK(f);
3176 
3177  for (i=0; i < f->channels; ++i) {
3178  int s = map->chan[i].mux, floor;
3179  zero_channel[i] = FALSE;
3180  floor = map->submap_floor[s];
3181  if (f->floor_types[floor] == 0) {
3182  return error(f, VORBIS_invalid_stream);
3183  } else {
3184  Floor1 *g = &f->floor_config[floor].floor1;
3185  if (get_bits(f, 1)) {
3186  short *finalY;
3187  uint8 step2_flag[256];
3188  static int range_list[4] = { 256, 128, 86, 64 };
3189  int range = range_list[g->floor1_multiplier-1];
3190  int offset = 2;
3191  finalY = f->finalY[i];
3192  finalY[0] = get_bits(f, ilog(range)-1);
3193  finalY[1] = get_bits(f, ilog(range)-1);
3194  for (j=0; j < g->partitions; ++j) {
3195  int pclass = g->partition_class_list[j];
3196  int cdim = g->class_dimensions[pclass];
3197  int cbits = g->class_subclasses[pclass];
3198  int csub = (1 << cbits)-1;
3199  int cval = 0;
3200  if (cbits) {
3201  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3202  DECODE(cval,f,c);
3203  }
3204  for (k=0; k < cdim; ++k) {
3205  int book = g->subclass_books[pclass][cval & csub];
3206  cval = cval >> cbits;
3207  if (book >= 0) {
3208  int temp;
3209  Codebook *c = f->codebooks + book;
3210  DECODE(temp,f,c);
3211  finalY[offset++] = temp;
3212  } else
3213  finalY[offset++] = 0;
3214  }
3215  }
3216  if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3217  step2_flag[0] = step2_flag[1] = 1;
3218  for (j=2; j < g->values; ++j) {
3219  int low, high, pred, highroom, lowroom, room, val;
3220  low = g->neighbors[j][0];
3221  high = g->neighbors[j][1];
3222  //neighbors(g->Xlist, j, &low, &high);
3223  pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3224  val = finalY[j];
3225  highroom = range - pred;
3226  lowroom = pred;
3227  if (highroom < lowroom)
3228  room = highroom * 2;
3229  else
3230  room = lowroom * 2;
3231  if (val) {
3232  step2_flag[low] = step2_flag[high] = 1;
3233  step2_flag[j] = 1;
3234  if (val >= room)
3235  if (highroom > lowroom)
3236  finalY[j] = val - lowroom + pred;
3237  else
3238  finalY[j] = pred - val + highroom - 1;
3239  else
3240  if (val & 1)
3241  finalY[j] = pred - ((val+1)>>1);
3242  else
3243  finalY[j] = pred + (val>>1);
3244  } else {
3245  step2_flag[j] = 0;
3246  finalY[j] = pred;
3247  }
3248  }
3249 
3250 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3251  do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3252 #else
3253  // defer final floor computation until _after_ residue
3254  for (j=0; j < g->values; ++j) {
3255  if (!step2_flag[j])
3256  finalY[j] = -1;
3257  }
3258 #endif
3259  } else {
3260  error:
3261  zero_channel[i] = TRUE;
3262  }
3263  // So we just defer everything else to later
3264 
3265  // at this point we've decoded the floor into buffer
3266  }
3267  }
3268  CHECK(f);
3269  // at this point we've decoded all floors
3270 
3271  if (f->alloc.alloc_buffer)
3273 
3274  // re-enable coupled channels if necessary
3275  memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3276  for (i=0; i < map->coupling_steps; ++i)
3277  if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3278  zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3279  }
3280 
3281  CHECK(f);
3282 // RESIDUE DECODE
3283  for (i=0; i < map->submaps; ++i) {
3284  float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3285  int r;
3286  uint8 do_not_decode[256];
3287  int ch = 0;
3288  for (j=0; j < f->channels; ++j) {
3289  if (map->chan[j].mux == i) {
3290  if (zero_channel[j]) {
3291  do_not_decode[ch] = TRUE;
3292  residue_buffers[ch] = NULL;
3293  } else {
3294  do_not_decode[ch] = FALSE;
3295  residue_buffers[ch] = f->channel_buffers[j];
3296  }
3297  ++ch;
3298  }
3299  }
3300  r = map->submap_residue[i];
3301  decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3302  }
3303 
3304  if (f->alloc.alloc_buffer)
3306  CHECK(f);
3307 
3308 // INVERSE COUPLING
3309  for (i = map->coupling_steps-1; i >= 0; --i) {
3310  int n2 = n >> 1;
3311  float *m = f->channel_buffers[map->chan[i].magnitude];
3312  float *a = f->channel_buffers[map->chan[i].angle ];
3313  for (j=0; j < n2; ++j) {
3314  float a2,m2;
3315  if (m[j] > 0)
3316  if (a[j] > 0)
3317  m2 = m[j], a2 = m[j] - a[j];
3318  else
3319  a2 = m[j], m2 = m[j] + a[j];
3320  else
3321  if (a[j] > 0)
3322  m2 = m[j], a2 = m[j] + a[j];
3323  else
3324  a2 = m[j], m2 = m[j] - a[j];
3325  m[j] = m2;
3326  a[j] = a2;
3327  }
3328  }
3329  CHECK(f);
3330 
3331  // finish decoding the floors
3332 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3333  for (i=0; i < f->channels; ++i) {
3334  if (really_zero_channel[i]) {
3335  memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3336  } else {
3337  do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3338  }
3339  }
3340 #else
3341  for (i=0; i < f->channels; ++i) {
3342  if (really_zero_channel[i]) {
3343  memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3344  } else {
3345  for (j=0; j < n2; ++j)
3346  f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3347  }
3348  }
3349 #endif
3350 
3351 // INVERSE MDCT
3352  CHECK(f);
3353  for (i=0; i < f->channels; ++i)
3354  inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3355  CHECK(f);
3356 
3357  // this shouldn't be necessary, unless we exited on an error
3358  // and want to flush to get to the next packet
3359  flush_packet(f);
3360 
3361  if (f->first_decode) {
3362  // assume we start so first non-discarded sample is sample 0
3363  // this isn't to spec, but spec would require us to read ahead
3364  // and decode the size of all current frames--could be done,
3365  // but presumably it's not a commonly used feature
3366  f->current_loc = -n2; // start of first frame is positioned for discard
3367  // we might have to discard samples "from" the next frame too,
3368  // if we're lapping a large block then a small at the start?
3369  f->discard_samples_deferred = n - right_end;
3370  f->current_loc_valid = TRUE;
3371  f->first_decode = FALSE;
3372  } else if (f->discard_samples_deferred) {
3373  if (f->discard_samples_deferred >= right_start - left_start) {
3374  f->discard_samples_deferred -= (right_start - left_start);
3375  left_start = right_start;
3376  *p_left = left_start;
3377  } else {
3378  left_start += f->discard_samples_deferred;
3379  *p_left = left_start;
3380  f->discard_samples_deferred = 0;
3381  }
3382  } else if (f->previous_length == 0 && f->current_loc_valid) {
3383  // we're recovering from a seek... that means we're going to discard
3384  // the samples from this packet even though we know our position from
3385  // the last page header, so we need to update the position based on
3386  // the discarded samples here
3387  // but wait, the code below is going to add this in itself even
3388  // on a discard, so we don't need to do it here...
3389  }
3390 
3391  // check if we have ogg information about the sample # for this packet
3392  if (f->last_seg_which == f->end_seg_with_known_loc) {
3393  // if we have a valid current loc, and this is final:
3394  if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3395  uint32 current_end = f->known_loc_for_packet - (n-right_end);
3396  // then let's infer the size of the (probably) short final frame
3397  if (current_end < f->current_loc + (right_end-left_start)) {
3398  if (current_end < f->current_loc) {
3399  // negative truncation, that's impossible!
3400  *len = 0;
3401  } else {
3402  *len = current_end - f->current_loc;
3403  }
3404  *len += left_start;
3405  if (*len > right_end) *len = right_end; // this should never happen
3406  f->current_loc += *len;
3407  return TRUE;
3408  }
3409  }
3410  // otherwise, just set our sample loc
3411  // guess that the ogg granule pos refers to the _middle_ of the
3412  // last frame?
3413  // set f->current_loc to the position of left_start
3414  f->current_loc = f->known_loc_for_packet - (n2-left_start);
3415  f->current_loc_valid = TRUE;
3416  }
3417  if (f->current_loc_valid)
3418  f->current_loc += (right_start - left_start);
3419 
3420  if (f->alloc.alloc_buffer)
3422  *len = right_end; // ignore samples after the window goes to 0
3423  CHECK(f);
3424 
3425  return TRUE;
3426 }
3427 
3428 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3429 {
3430  int mode, left_end, right_end;
3431  if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3432  return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3433 }
3434 
3435 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3436 {
3437  int prev,i,j;
3438  // we use right&left (the start of the right- and left-window sin()-regions)
3439  // to determine how much to return, rather than inferring from the rules
3440  // (same result, clearer code); 'left' indicates where our sin() window
3441  // starts, therefore where the previous window's right edge starts, and
3442  // therefore where to start mixing from the previous buffer. 'right'
3443  // indicates where our sin() ending-window starts, therefore that's where
3444  // we start saving, and where our returned-data ends.
3445 
3446  // mixin from previous window
3447  if (f->previous_length) {
3448  int i,j, n = f->previous_length;
3449  float *w = get_window(f, n);
3450  for (i=0; i < f->channels; ++i) {
3451  for (j=0; j < n; ++j)
3452  f->channel_buffers[i][left+j] =
3453  f->channel_buffers[i][left+j]*w[ j] +
3454  f->previous_window[i][ j]*w[n-1-j];
3455  }
3456  }
3457 
3458  prev = f->previous_length;
3459 
3460  // last half of this data becomes previous window
3461  f->previous_length = len - right;
3462 
3463  // @OPTIMIZE: could avoid this copy by double-buffering the
3464  // output (flipping previous_window with channel_buffers), but
3465  // then previous_window would have to be 2x as large, and
3466  // channel_buffers couldn't be temp mem (although they're NOT
3467  // currently temp mem, they could be (unless we want to level
3468  // performance by spreading out the computation))
3469  for (i=0; i < f->channels; ++i)
3470  for (j=0; right+j < len; ++j)
3471  f->previous_window[i][j] = f->channel_buffers[i][right+j];
3472 
3473  if (!prev)
3474  // there was no previous packet, so this data isn't valid...
3475  // this isn't entirely true, only the would-have-overlapped data
3476  // isn't valid, but this seems to be what the spec requires
3477  return 0;
3478 
3479  // truncate a short frame
3480  if (len < right) right = len;
3481 
3482  f->samples_output += right-left;
3483 
3484  return right - left;
3485 }
3486 
3488 {
3489  int len, right, left;
3490  if (vorbis_decode_packet(f, &len, &left, &right))
3491  vorbis_finish_frame(f, len, left, right);
3492 }
3493 
3494 #ifndef STB_VORBIS_NO_PUSHDATA_API
3495 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3496 {
3497  // make sure that we have the packet available before continuing...
3498  // this requires a full ogg parse, but we know we can fetch from f->stream
3499 
3500  // instead of coding this out explicitly, we could save the current read state,
3501  // read the next packet with get8() until end-of-packet, check f->eof, then
3502  // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3503  // of state to restore (primarily the page segment table)
3504 
3505  int s = f->next_seg, first = TRUE;
3506  uint8 *p = f->stream;
3507 
3508  if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3509  for (; s < f->segment_count; ++s) {
3510  p += f->segments[s];
3511  if (f->segments[s] < 255) // stop at first short segment
3512  break;
3513  }
3514  // either this continues, or it ends it...
3515  if (end_page)
3516  if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3517  if (s == f->segment_count)
3518  s = -1; // set 'crosses page' flag
3519  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3520  first = FALSE;
3521  }
3522  for (; s == -1;) {
3523  uint8 *q;
3524  int n;
3525 
3526  // check that we have the page header ready
3527  if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3528  // validate the page
3529  if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3530  if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3531  if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3532  if (f->previous_length)
3533  if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3534  // if no previous length, we're resynching, so we can come in on a continued-packet,
3535  // which we'll just drop
3536  } else {
3537  if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3538  }
3539  n = p[26]; // segment counts
3540  q = p+27; // q points to segment table
3541  p = q + n; // advance past header
3542  // make sure we've read the segment table
3543  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3544  for (s=0; s < n; ++s) {
3545  p += q[s];
3546  if (q[s] < 255)
3547  break;
3548  }
3549  if (end_page)
3550  if (s < n-1) return error(f, VORBIS_invalid_stream);
3551  if (s == n)
3552  s = -1; // set 'crosses page' flag
3553  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3554  first = FALSE;
3555  }
3556  return TRUE;
3557 }
3558 #endif // !STB_VORBIS_NO_PUSHDATA_API
3559 
3560 static int start_decoder(vorb *f)
3561 {
3562  uint8 header[6], x,y;
3563  int len,i,j,k, max_submaps = 0;
3564  int longest_floorlist=0;
3565 
3566  // first page, first packet
3567 
3568  if (!start_page(f)) return FALSE;
3569  // validate page flag
3573  // check for expected packet length
3574  if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3575  if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3576  // read packet
3577  // check packet header
3578  if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3579  if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3580  if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3581  // vorbis_version
3582  if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3583  f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3585  f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3586  get32(f); // bitrate_maximum
3587  get32(f); // bitrate_nominal
3588  get32(f); // bitrate_minimum
3589  x = get8(f);
3590  {
3591  int log0,log1;
3592  log0 = x & 15;
3593  log1 = x >> 4;
3594  f->blocksize_0 = 1 << log0;
3595  f->blocksize_1 = 1 << log1;
3596  if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3597  if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3598  if (log0 > log1) return error(f, VORBIS_invalid_setup);
3599  }
3600 
3601  // framing_flag
3602  x = get8(f);
3603  if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3604 
3605  // second packet!
3606  if (!start_page(f)) return FALSE;
3607 
3608  if (!start_packet(f)) return FALSE;
3609  do {
3610  len = next_segment(f);
3611  skip(f, len);
3612  f->bytes_in_seg = 0;
3613  } while (len);
3614 
3615  // third packet!
3616  if (!start_packet(f)) return FALSE;
3617 
3618  #ifndef STB_VORBIS_NO_PUSHDATA_API
3619  if (IS_PUSH_MODE(f)) {
3620  if (!is_whole_packet_present(f, TRUE)) {
3621  // convert error in ogg header to write type
3622  if (f->error == VORBIS_invalid_stream)
3624  return FALSE;
3625  }
3626  }
3627  #endif
3628 
3629  crc32_init(); // always init it, to avoid multithread race conditions
3630 
3632  for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3633  if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3634 
3635  // codebooks
3636 
3637  f->codebook_count = get_bits(f,8) + 1;
3638  f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3639  if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3640  memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3641  for (i=0; i < f->codebook_count; ++i) {
3642  uint32 *values;
3643  int ordered, sorted_count;
3644  int total=0;
3645  uint8 *lengths;
3646  Codebook *c = f->codebooks+i;
3647  CHECK(f);
3648  x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3649  x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3650  x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3651  x = get_bits(f, 8);
3652  c->dimensions = (get_bits(f, 8)<<8) + x;
3653  x = get_bits(f, 8);
3654  y = get_bits(f, 8);
3655  c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3656  ordered = get_bits(f,1);
3657  c->sparse = ordered ? 0 : get_bits(f,1);
3658 
3659  if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3660 
3661  if (c->sparse)
3662  lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3663  else
3664  lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3665 
3666  if (!lengths) return error(f, VORBIS_outofmem);
3667 
3668  if (ordered) {
3669  int current_entry = 0;
3670  int current_length = get_bits(f,5) + 1;
3671  while (current_entry < c->entries) {
3672  int limit = c->entries - current_entry;
3673  int n = get_bits(f, ilog(limit));
3674  if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3675  memset(lengths + current_entry, current_length, n);
3676  current_entry += n;
3677  ++current_length;
3678  }
3679  } else {
3680  for (j=0; j < c->entries; ++j) {
3681  int present = c->sparse ? get_bits(f,1) : 1;
3682  if (present) {
3683  lengths[j] = get_bits(f, 5) + 1;
3684  ++total;
3685  if (lengths[j] == 32)
3686  return error(f, VORBIS_invalid_setup);
3687  } else {
3688  lengths[j] = NO_CODE;
3689  }
3690  }
3691  }
3692 
3693  if (c->sparse && total >= c->entries >> 2) {
3694  // convert sparse items to non-sparse!
3695  if (c->entries > (int) f->setup_temp_memory_required)
3697 
3698  c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3699  if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3700  memcpy(c->codeword_lengths, lengths, c->entries);
3701  setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3702  lengths = c->codeword_lengths;
3703  c->sparse = 0;
3704  }
3705 
3706  // compute the size of the sorted tables
3707  if (c->sparse) {
3708  sorted_count = total;
3709  } else {
3710  sorted_count = 0;
3711  #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3712  for (j=0; j < c->entries; ++j)
3713  if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3714  ++sorted_count;
3715  #endif
3716  }
3717 
3718  c->sorted_entries = sorted_count;
3719  values = NULL;
3720 
3721  CHECK(f);
3722  if (!c->sparse) {
3723  c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3724  if (!c->codewords) return error(f, VORBIS_outofmem);
3725  } else {
3726  unsigned int size;
3727  if (c->sorted_entries) {
3729  if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3730  c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3731  if (!c->codewords) return error(f, VORBIS_outofmem);
3732  values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3733  if (!values) return error(f, VORBIS_outofmem);
3734  }
3735  size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3736  if (size > f->setup_temp_memory_required)
3737  f->setup_temp_memory_required = size;
3738  }
3739 
3740  if (!compute_codewords(c, lengths, c->entries, values)) {
3741  if (c->sparse) setup_temp_free(f, values, 0);
3742  return error(f, VORBIS_invalid_setup);
3743  }
3744 
3745  if (c->sorted_entries) {
3746  // allocate an extra slot for sentinels
3747  c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3748  if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3749  // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3750  // so that we can catch that case without an extra if
3751  c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3752  if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3753  ++c->sorted_values;
3754  c->sorted_values[-1] = -1;
3755  compute_sorted_huffman(c, lengths, values);
3756  }
3757 
3758  if (c->sparse) {
3759  setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3760  setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3761  setup_temp_free(f, lengths, c->entries);
3762  c->codewords = NULL;
3763  }
3764 
3766 
3767  CHECK(f);
3768  c->lookup_type = get_bits(f, 4);
3769  if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3770  if (c->lookup_type > 0) {
3771  uint16 *mults;
3772  c->minimum_value = float32_unpack(get_bits(f, 32));
3773  c->delta_value = float32_unpack(get_bits(f, 32));
3774  c->value_bits = get_bits(f, 4)+1;
3775  c->sequence_p = get_bits(f,1);
3776  if (c->lookup_type == 1) {
3778  } else {
3779  c->lookup_values = c->entries * c->dimensions;
3780  }
3781  if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3782  mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3783  if (mults == NULL) return error(f, VORBIS_outofmem);
3784  for (j=0; j < (int) c->lookup_values; ++j) {
3785  int q = get_bits(f, c->value_bits);
3786  if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3787  mults[j] = q;
3788  }
3789 
3790 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3791  if (c->lookup_type == 1) {
3792  int len, sparse = c->sparse;
3793  float last=0;
3794  // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3795  if (sparse) {
3796  if (c->sorted_entries == 0) goto skip;
3797  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3798  } else
3799  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3800  if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3801  len = sparse ? c->sorted_entries : c->entries;
3802  for (j=0; j < len; ++j) {
3803  unsigned int z = sparse ? c->sorted_values[j] : j;
3804  unsigned int div=1;
3805  for (k=0; k < c->dimensions; ++k) {
3806  int off = (z / div) % c->lookup_values;
3807  float val = mults[off];
3808  val = mults[off]*c->delta_value + c->minimum_value + last;
3809  c->multiplicands[j*c->dimensions + k] = val;
3810  if (c->sequence_p)
3811  last = val;
3812  if (k+1 < c->dimensions) {
3813  if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3814  setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3815  return error(f, VORBIS_invalid_setup);
3816  }
3817  div *= c->lookup_values;
3818  }
3819  }
3820  }
3821  c->lookup_type = 2;
3822  }
3823  else
3824 #endif
3825  {
3826  float last=0;
3827  CHECK(f);
3828  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3829  if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3830  for (j=0; j < (int) c->lookup_values; ++j) {
3831  float val = mults[j] * c->delta_value + c->minimum_value + last;
3832  c->multiplicands[j] = val;
3833  if (c->sequence_p)
3834  last = val;
3835  }
3836  }
3837 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3838  skip:;
3839 #endif
3840  setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3841 
3842  CHECK(f);
3843  }
3844  CHECK(f);
3845  }
3846 
3847  // time domain transfers (notused)
3848 
3849  x = get_bits(f, 6) + 1;
3850  for (i=0; i < x; ++i) {
3851  uint32 z = get_bits(f, 16);
3852  if (z != 0) return error(f, VORBIS_invalid_setup);
3853  }
3854 
3855  // Floors
3856  f->floor_count = get_bits(f, 6)+1;
3857  f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3858  if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3859  for (i=0; i < f->floor_count; ++i) {
3860  f->floor_types[i] = get_bits(f, 16);
3861  if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3862  if (f->floor_types[i] == 0) {
3863  Floor0 *g = &f->floor_config[i].floor0;
3864  g->order = get_bits(f,8);
3865  g->rate = get_bits(f,16);
3866  g->bark_map_size = get_bits(f,16);
3867  g->amplitude_bits = get_bits(f,6);
3868  g->amplitude_offset = get_bits(f,8);
3869  g->number_of_books = get_bits(f,4) + 1;
3870  for (j=0; j < g->number_of_books; ++j)
3871  g->book_list[j] = get_bits(f,8);
3873  } else {
3874  Point p[31*8+2];
3875  Floor1 *g = &f->floor_config[i].floor1;
3876  int max_class = -1;
3877  g->partitions = get_bits(f, 5);
3878  for (j=0; j < g->partitions; ++j) {
3879  g->partition_class_list[j] = get_bits(f, 4);
3880  if (g->partition_class_list[j] > max_class)
3881  max_class = g->partition_class_list[j];
3882  }
3883  for (j=0; j <= max_class; ++j) {
3884  g->class_dimensions[j] = get_bits(f, 3)+1;
3885  g->class_subclasses[j] = get_bits(f, 2);
3886  if (g->class_subclasses[j]) {
3887  g->class_masterbooks[j] = get_bits(f, 8);
3888  if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3889  }
3890  for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3891  g->subclass_books[j][k] = get_bits(f,8)-1;
3892  if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3893  }
3894  }
3895  g->floor1_multiplier = get_bits(f,2)+1;
3896  g->rangebits = get_bits(f,4);
3897  g->Xlist[0] = 0;
3898  g->Xlist[1] = 1 << g->rangebits;
3899  g->values = 2;
3900  for (j=0; j < g->partitions; ++j) {
3901  int c = g->partition_class_list[j];
3902  for (k=0; k < g->class_dimensions[c]; ++k) {
3903  g->Xlist[g->values] = get_bits(f, g->rangebits);
3904  ++g->values;
3905  }
3906  }
3907  // precompute the sorting
3908  for (j=0; j < g->values; ++j) {
3909  p[j].x = g->Xlist[j];
3910  p[j].y = j;
3911  }
3912  qsort(p, g->values, sizeof(p[0]), point_compare);
3913  for (j=0; j < g->values; ++j)
3914  g->sorted_order[j] = (uint8) p[j].y;
3915  // precompute the neighbors
3916  for (j=2; j < g->values; ++j) {
3917  int low = 0,hi = 0;
3918  neighbors(g->Xlist, j, &low,&hi);
3919  g->neighbors[j][0] = low;
3920  g->neighbors[j][1] = hi;
3921  }
3922 
3923  if (g->values > longest_floorlist)
3924  longest_floorlist = g->values;
3925  }
3926  }
3927 
3928  // Residue
3929  f->residue_count = get_bits(f, 6)+1;
3930  f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3931  if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3932  memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3933  for (i=0; i < f->residue_count; ++i) {
3934  uint8 residue_cascade[64];
3935  Residue *r = f->residue_config+i;
3936  f->residue_types[i] = get_bits(f, 16);
3937  if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3938  r->begin = get_bits(f, 24);
3939  r->end = get_bits(f, 24);
3940  if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3941  r->part_size = get_bits(f,24)+1;
3942  r->classifications = get_bits(f,6)+1;
3943  r->classbook = get_bits(f,8);
3944  if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3945  for (j=0; j < r->classifications; ++j) {
3946  uint8 high_bits=0;
3947  uint8 low_bits=get_bits(f,3);
3948  if (get_bits(f,1))
3949  high_bits = get_bits(f,5);
3950  residue_cascade[j] = high_bits*8 + low_bits;
3951  }
3952  r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3953  if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3954  for (j=0; j < r->classifications; ++j) {
3955  for (k=0; k < 8; ++k) {
3956  if (residue_cascade[j] & (1 << k)) {
3957  r->residue_books[j][k] = get_bits(f, 8);
3958  if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3959  } else {
3960  r->residue_books[j][k] = -1;
3961  }
3962  }
3963  }
3964  // precompute the classifications[] array to avoid inner-loop mod/divide
3965  // call it 'classdata' since we already have r->classifications
3966  r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3967  if (!r->classdata) return error(f, VORBIS_outofmem);
3968  memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3969  for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3970  int classwords = f->codebooks[r->classbook].dimensions;
3971  int temp = j;
3972  r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3973  if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3974  for (k=classwords-1; k >= 0; --k) {
3975  r->classdata[j][k] = temp % r->classifications;
3976  temp /= r->classifications;
3977  }
3978  }
3979  }
3980 
3981  f->mapping_count = get_bits(f,6)+1;
3982  f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3983  if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3984  memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3985  for (i=0; i < f->mapping_count; ++i) {
3986  Mapping *m = f->mapping + i;
3987  int mapping_type = get_bits(f,16);
3988  if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3989  m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3990  if (m->chan == NULL) return error(f, VORBIS_outofmem);
3991  if (get_bits(f,1))
3992  m->submaps = get_bits(f,4)+1;
3993  else
3994  m->submaps = 1;
3995  if (m->submaps > max_submaps)
3996  max_submaps = m->submaps;
3997  if (get_bits(f,1)) {
3998  m->coupling_steps = get_bits(f,8)+1;
3999  for (k=0; k < m->coupling_steps; ++k) {
4000  m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4001  m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4002  if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4003  if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4004  if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4005  }
4006  } else
4007  m->coupling_steps = 0;
4008 
4009  // reserved field
4010  if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4011  if (m->submaps > 1) {
4012  for (j=0; j < f->channels; ++j) {
4013  m->chan[j].mux = get_bits(f, 4);
4014  if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4015  }
4016  } else
4017  // @SPECIFICATION: this case is missing from the spec
4018  for (j=0; j < f->channels; ++j)
4019  m->chan[j].mux = 0;
4020 
4021  for (j=0; j < m->submaps; ++j) {
4022  get_bits(f,8); // discard
4023  m->submap_floor[j] = get_bits(f,8);
4024  m->submap_residue[j] = get_bits(f,8);
4025  if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4026  if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4027  }
4028  }
4029 
4030  // Modes
4031  f->mode_count = get_bits(f, 6)+1;
4032  for (i=0; i < f->mode_count; ++i) {
4033  Mode *m = f->mode_config+i;
4034  m->blockflag = get_bits(f,1);
4035  m->windowtype = get_bits(f,16);
4036  m->transformtype = get_bits(f,16);
4037  m->mapping = get_bits(f,8);
4038  if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4039  if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4040  if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4041  }
4042 
4043  flush_packet(f);
4044 
4045  f->previous_length = 0;
4046 
4047  for (i=0; i < f->channels; ++i) {
4048  f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4049  f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4050  f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4051  if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4052  #ifdef STB_VORBIS_NO_DEFER_FLOOR
4053  f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4054  if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4055  #endif
4056  }
4057 
4058  if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4059  if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4060  f->blocksize[0] = f->blocksize_0;
4061  f->blocksize[1] = f->blocksize_1;
4062 
4063 #ifdef STB_VORBIS_DIVIDE_TABLE
4064  if (integer_divide_table[1][1]==0)
4065  for (i=0; i < DIVTAB_NUMER; ++i)
4066  for (j=1; j < DIVTAB_DENOM; ++j)
4067  integer_divide_table[i][j] = i / j;
4068 #endif
4069 
4070  // compute how much temporary memory is needed
4071 
4072  // 1.
4073  {
4074  uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4075  uint32 classify_mem;
4076  int i,max_part_read=0;
4077  for (i=0; i < f->residue_count; ++i) {
4078  Residue *r = f->residue_config + i;
4079  int n_read = r->end - r->begin;
4080  int part_read = n_read / r->part_size;
4081  if (part_read > max_part_read)
4082  max_part_read = part_read;
4083  }
4084  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4085  classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4086  #else
4087  classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4088  #endif
4089 
4090  f->temp_memory_required = classify_mem;
4091  if (imdct_mem > f->temp_memory_required)
4092  f->temp_memory_required = imdct_mem;
4093  }
4094 
4095  f->first_decode = TRUE;
4096 
4097  if (f->alloc.alloc_buffer) {
4099  // check if there's enough temp memory so we don't error later
4100  if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4101  return error(f, VORBIS_outofmem);
4102  }
4103 
4105 
4106  return TRUE;
4107 }
4108 
4109 static void vorbis_deinit(stb_vorbis *p)
4110 {
4111  int i,j;
4112  if (p->residue_config) {
4113  for (i=0; i < p->residue_count; ++i) {
4114  Residue *r = p->residue_config+i;
4115  if (r->classdata) {
4116  for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4117  setup_free(p, r->classdata[j]);
4118  setup_free(p, r->classdata);
4119  }
4120  setup_free(p, r->residue_books);
4121  }
4122  }
4123 
4124  if (p->codebooks) {
4125  CHECK(p);
4126  for (i=0; i < p->codebook_count; ++i) {
4127  Codebook *c = p->codebooks + i;
4129  setup_free(p, c->multiplicands);
4130  setup_free(p, c->codewords);
4132  // c->sorted_values[-1] is the first entry in the array
4133  setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4134  }
4135  setup_free(p, p->codebooks);
4136  }
4137  setup_free(p, p->floor_config);
4138  setup_free(p, p->residue_config);
4139  if (p->mapping) {
4140  for (i=0; i < p->mapping_count; ++i)
4141  setup_free(p, p->mapping[i].chan);
4142  setup_free(p, p->mapping);
4143  }
4144  CHECK(p);
4145  for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4146  setup_free(p, p->channel_buffers[i]);
4147  setup_free(p, p->previous_window[i]);
4148  #ifdef STB_VORBIS_NO_DEFER_FLOOR
4149  setup_free(p, p->floor_buffers[i]);
4150  #endif
4151  setup_free(p, p->finalY[i]);
4152  }
4153  for (i=0; i < 2; ++i) {
4154  setup_free(p, p->A[i]);
4155  setup_free(p, p->B[i]);
4156  setup_free(p, p->C[i]);
4157  setup_free(p, p->window[i]);
4158  setup_free(p, p->bit_reverse[i]);
4159  }
4160  #ifndef STB_VORBIS_NO_STDIO
4161  if (p->close_on_free) fclose(p->f);
4162  #endif
4163 }
4164 
4166 {
4167  if (p == NULL) return;
4168  vorbis_deinit(p);
4169  setup_free(p,p);
4170 }
4171 
4172 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4173 {
4174  memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4175  if (z) {
4176  p->alloc = *z;
4179  }
4180  p->eof = 0;
4181  p->error = VORBIS__no_error;
4182  p->stream = NULL;
4183  p->codebooks = NULL;
4184  p->page_crc_tests = -1;
4185  #ifndef STB_VORBIS_NO_STDIO
4186  p->close_on_free = FALSE;
4187  p->f = NULL;
4188  #endif
4189 }
4190 
4192 {
4193  if (f->current_loc_valid)
4194  return f->current_loc;
4195  else
4196  return -1;
4197 }
4198 
4200 {
4201  stb_vorbis_info d;
4202  d.channels = f->channels;
4203  d.sample_rate = f->sample_rate;
4207  d.max_frame_size = f->blocksize_1 >> 1;
4208  return d;
4209 }
4210 
4212 {
4213  int e = f->error;
4214  f->error = VORBIS__no_error;
4215  return e;
4216 }
4217 
4219 {
4220  stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4221  return p;
4222 }
4223 
4224 #ifndef STB_VORBIS_NO_PUSHDATA_API
4225 
4227 {
4228  f->previous_length = 0;
4229  f->page_crc_tests = 0;
4230  f->discard_samples_deferred = 0;
4231  f->current_loc_valid = FALSE;
4232  f->first_decode = FALSE;
4233  f->samples_output = 0;
4234  f->channel_buffer_start = 0;
4235  f->channel_buffer_end = 0;
4236 }
4237 
4238 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4239 {
4240  int i,n;
4241  for (i=0; i < f->page_crc_tests; ++i)
4242  f->scan[i].bytes_done = 0;
4243 
4244  // if we have room for more scans, search for them first, because
4245  // they may cause us to stop early if their header is incomplete
4247  if (data_len < 4) return 0;
4248  data_len -= 3; // need to look for 4-byte sequence, so don't miss
4249  // one that straddles a boundary
4250  for (i=0; i < data_len; ++i) {
4251  if (data[i] == 0x4f) {
4252  if (0==memcmp(data+i, ogg_page_header, 4)) {
4253  int j,len;
4254  uint32 crc;
4255  // make sure we have the whole page header
4256  if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4257  // only read up to this page start, so hopefully we'll
4258  // have the whole page header start next time
4259  data_len = i;
4260  break;
4261  }
4262  // ok, we have it all; compute the length of the page
4263  len = 27 + data[i+26];
4264  for (j=0; j < data[i+26]; ++j)
4265  len += data[i+27+j];
4266  // scan everything up to the embedded crc (which we must 0)
4267  crc = 0;
4268  for (j=0; j < 22; ++j)
4269  crc = crc32_update(crc, data[i+j]);
4270  // now process 4 0-bytes
4271  for ( ; j < 26; ++j)
4272  crc = crc32_update(crc, 0);
4273  // len is the total number of bytes we need to scan
4274  n = f->page_crc_tests++;
4275  f->scan[n].bytes_left = len-j;
4276  f->scan[n].crc_so_far = crc;
4277  f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4278  // if the last frame on a page is continued to the next, then
4279  // we can't recover the sample_loc immediately
4280  if (data[i+27+data[i+26]-1] == 255)
4281  f->scan[n].sample_loc = ~0;
4282  else
4283  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4284  f->scan[n].bytes_done = i+j;
4286  break;
4287  // keep going if we still have room for more
4288  }
4289  }
4290  }
4291  }
4292 
4293  for (i=0; i < f->page_crc_tests;) {
4294  uint32 crc;
4295  int j;
4296  int n = f->scan[i].bytes_done;
4297  int m = f->scan[i].bytes_left;
4298  if (m > data_len - n) m = data_len - n;
4299  // m is the bytes to scan in the current chunk
4300  crc = f->scan[i].crc_so_far;
4301  for (j=0; j < m; ++j)
4302  crc = crc32_update(crc, data[n+j]);
4303  f->scan[i].bytes_left -= m;
4304  f->scan[i].crc_so_far = crc;
4305  if (f->scan[i].bytes_left == 0) {
4306  // does it match?
4307  if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4308  // Houston, we have page
4309  data_len = n+m; // consumption amount is wherever that scan ended
4310  f->page_crc_tests = -1; // drop out of page scan mode
4311  f->previous_length = 0; // decode-but-don't-output one frame
4312  f->next_seg = -1; // start a new page
4313  f->current_loc = f->scan[i].sample_loc; // set the current sample location
4314  // to the amount we'd have decoded had we decoded this page
4315  f->current_loc_valid = f->current_loc != ~0U;
4316  return data_len;
4317  }
4318  // delete entry
4319  f->scan[i] = f->scan[--f->page_crc_tests];
4320  } else {
4321  ++i;
4322  }
4323  }
4324 
4325  return data_len;
4326 }
4327 
4328 // return value: number of bytes we used
4330  stb_vorbis *f, // the file we're decoding
4331  const uint8 *data, int data_len, // the memory available for decoding
4332  int *channels, // place to write number of float * buffers
4333  float ***output, // place to write float ** array of float * buffers
4334  int *samples // place to write number of output samples
4335  )
4336 {
4337  int i;
4338  int len,right,left;
4339 
4340  if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4341 
4342  if (f->page_crc_tests >= 0) {
4343  *samples = 0;
4344  return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4345  }
4346 
4347  f->stream = (uint8 *) data;
4348  f->stream_end = (uint8 *) data + data_len;
4349  f->error = VORBIS__no_error;
4350 
4351  // check that we have the entire packet in memory
4352  if (!is_whole_packet_present(f, FALSE)) {
4353  *samples = 0;
4354  return 0;
4355  }
4356 
4357  if (!vorbis_decode_packet(f, &len, &left, &right)) {
4358  // save the actual error we encountered
4359  enum STBVorbisError error = f->error;
4360  if (error == VORBIS_bad_packet_type) {
4361  // flush and resynch
4362  f->error = VORBIS__no_error;
4363  while (get8_packet(f) != EOP)
4364  if (f->eof) break;
4365  *samples = 0;
4366  return (int) (f->stream - data);
4367  }
4368  if (error == VORBIS_continued_packet_flag_invalid) {
4369  if (f->previous_length == 0) {
4370  // we may be resynching, in which case it's ok to hit one
4371  // of these; just discard the packet
4372  f->error = VORBIS__no_error;
4373  while (get8_packet(f) != EOP)
4374  if (f->eof) break;
4375  *samples = 0;
4376  return (int) (f->stream - data);
4377  }
4378  }
4379  // if we get an error while parsing, what to do?
4380  // well, it DEFINITELY won't work to continue from where we are!
4382  // restore the error that actually made us bail
4383  f->error = error;
4384  *samples = 0;
4385  return 1;
4386  }
4387 
4388  // success!
4389  len = vorbis_finish_frame(f, len, left, right);
4390  for (i=0; i < f->channels; ++i)
4391  f->outputs[i] = f->channel_buffers[i] + left;
4392 
4393  if (channels) *channels = f->channels;
4394  *samples = len;
4395  *output = f->outputs;
4396  return (int) (f->stream - data);
4397 }
4398 
4400  const unsigned char *data, int data_len, // the memory available for decoding
4401  int *data_used, // only defined if result is not NULL
4402  int *error, const stb_vorbis_alloc *alloc)
4403 {
4404  stb_vorbis *f, p;
4405  vorbis_init(&p, alloc);
4406  p.stream = (uint8 *) data;
4407  p.stream_end = (uint8 *) data + data_len;
4408  p.push_mode = TRUE;
4409  if (!start_decoder(&p)) {
4410  if (p.eof)
4411  *error = VORBIS_need_more_data;
4412  else
4413  *error = p.error;
4414  return NULL;
4415  }
4416  f = vorbis_alloc(&p);
4417  if (f) {
4418  *f = p;
4419  *data_used = (int) (f->stream - data);
4420  *error = 0;
4421  return f;
4422  } else {
4423  vorbis_deinit(&p);
4424  return NULL;
4425  }
4426 }
4427 #endif // STB_VORBIS_NO_PUSHDATA_API
4428 
4430 {
4431  #ifndef STB_VORBIS_NO_PUSHDATA_API
4432  if (f->push_mode) return 0;
4433  #endif
4434  if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4435  #ifndef STB_VORBIS_NO_STDIO
4436  return (unsigned int) (ftell(f->f) - f->f_start);
4437  #endif
4438 }
4439 
4440 #ifndef STB_VORBIS_NO_PULLDATA_API
4441 //
4442 // DATA-PULLING API
4443 //
4444 
4446 {
4447  for(;;) {
4448  int n;
4449  if (f->eof) return 0;
4450  n = get8(f);
4451  if (n == 0x4f) { // page header candidate
4452  unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4453  int i;
4454  // check if we're off the end of a file_section stream
4455  if (retry_loc - 25 > f->stream_len)
4456  return 0;
4457  // check the rest of the header
4458  for (i=1; i < 4; ++i)
4459  if (get8(f) != ogg_page_header[i])
4460  break;
4461  if (f->eof) return 0;
4462  if (i == 4) {
4463  uint8 header[27];
4464  uint32 i, crc, goal, len;
4465  for (i=0; i < 4; ++i)
4466  header[i] = ogg_page_header[i];
4467  for (; i < 27; ++i)
4468  header[i] = get8(f);
4469  if (f->eof) return 0;
4470  if (header[4] != 0) goto invalid;
4471  goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4472  for (i=22; i < 26; ++i)
4473  header[i] = 0;
4474  crc = 0;
4475  for (i=0; i < 27; ++i)
4476  crc = crc32_update(crc, header[i]);
4477  len = 0;
4478  for (i=0; i < header[26]; ++i) {
4479  int s = get8(f);
4480  crc = crc32_update(crc, s);
4481  len += s;
4482  }
4483  if (len && f->eof) return 0;
4484  for (i=0; i < len; ++i)
4485  crc = crc32_update(crc, get8(f));
4486  // finished parsing probable page
4487  if (crc == goal) {
4488  // we could now check that it's either got the last
4489  // page flag set, OR it's followed by the capture
4490  // pattern, but I guess TECHNICALLY you could have
4491  // a file with garbage between each ogg page and recover
4492  // from it automatically? So even though that paranoia
4493  // might decrease the chance of an invalid decode by
4494  // another 2^32, not worth it since it would hose those
4495  // invalid-but-useful files?
4496  if (end)
4497  *end = stb_vorbis_get_file_offset(f);
4498  if (last) {
4499  if (header[5] & 0x04)
4500  *last = 1;
4501  else
4502  *last = 0;
4503  }
4504  set_file_offset(f, retry_loc-1);
4505  return 1;
4506  }
4507  }
4508  invalid:
4509  // not a valid page, so rewind and look for next one
4510  set_file_offset(f, retry_loc);
4511  }
4512  }
4513 }
4514 
4515 
4516 #define SAMPLE_unknown 0xffffffff
4517 
4518 // seeking is implemented with a binary search, which narrows down the range to
4519 // 64K, before using a linear search (because finding the synchronization
4520 // pattern can be expensive, and the chance we'd find the end page again is
4521 // relatively high for small ranges)
4522 //
4523 // two initial interpolation-style probes are used at the start of the search
4524 // to try to bound either side of the binary search sensibly, while still
4525 // working in O(log n) time if they fail.
4526 
4528 {
4529  uint8 header[27], lacing[255];
4530  int i,len;
4531 
4532  // record where the page starts
4534 
4535  // parse the header
4536  getn(f, header, 27);
4537  if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4538  return 0;
4539  getn(f, lacing, header[26]);
4540 
4541  // determine the length of the payload
4542  len = 0;
4543  for (i=0; i < header[26]; ++i)
4544  len += lacing[i];
4545 
4546  // this implies where the page ends
4547  z->page_end = z->page_start + 27 + header[26] + len;
4548 
4549  // read the last-decoded sample out of the data
4550  z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4551 
4552  // restore file state to where we were
4553  set_file_offset(f, z->page_start);
4554  return 1;
4555 }
4556 
4557 // rarely used function to seek back to the preceeding page while finding the
4558 // start of a packet
4559 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4560 {
4561  unsigned int previous_safe, end;
4562 
4563  // now we want to seek back 64K from the limit
4564  if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4565  previous_safe = limit_offset - 65536;
4566  else
4567  previous_safe = f->first_audio_page_offset;
4568 
4569  set_file_offset(f, previous_safe);
4570 
4571  while (vorbis_find_page(f, &end, NULL)) {
4572  if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4573  return 1;
4574  set_file_offset(f, end);
4575  }
4576 
4577  return 0;
4578 }
4579 
4580 // implements the search logic for finding a page and starting decoding. if
4581 // the function succeeds, current_loc_valid will be true and current_loc will
4582 // be less than or equal to the provided sample number (the closer the
4583 // better).
4584 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4585 {
4586  ProbedPage left, right, mid;
4587  int i, start_seg_with_known_loc, end_pos, page_start;
4588  uint32 delta, stream_length, padding;
4589  double offset = 0, bytes_per_sample = 0;
4590  int probe = 0;
4591 
4592  // find the last page and validate the target sample
4593  stream_length = stb_vorbis_stream_length_in_samples(f);
4594  if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4595  if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4596 
4597  // this is the maximum difference between the window-center (which is the
4598  // actual granule position value), and the right-start (which the spec
4599  // indicates should be the granule position (give or take one)).
4600  padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4601  if (sample_number < padding)
4602  sample_number = 0;
4603  else
4604  sample_number -= padding;
4605 
4606  left = f->p_first;
4607  while (left.last_decoded_sample == ~0U) {
4608  // (untested) the first page does not have a 'last_decoded_sample'
4609  set_file_offset(f, left.page_end);
4610  if (!get_seek_page_info(f, &left)) goto error;
4611  }
4612 
4613  right = f->p_last;
4614  assert(right.last_decoded_sample != ~0U);
4615 
4616  // starting from the start is handled differently
4617  if (sample_number <= left.last_decoded_sample) {
4619  return 1;
4620  }
4621 
4622  while (left.page_end != right.page_start) {
4623  assert(left.page_end < right.page_start);
4624  // search range in bytes
4625  delta = right.page_start - left.page_end;
4626  if (delta <= 65536) {
4627  // there's only 64K left to search - handle it linearly
4628  set_file_offset(f, left.page_end);
4629  } else {
4630  if (probe < 2) {
4631  if (probe == 0) {
4632  // first probe (interpolate)
4633  double data_bytes = right.page_end - left.page_start;
4634  bytes_per_sample = data_bytes / right.last_decoded_sample;
4635  offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4636  } else {
4637  // second probe (try to bound the other side)
4638  double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4639  if (error >= 0 && error < 8000) error = 8000;
4640  if (error < 0 && error > -8000) error = -8000;
4641  offset += error * 2;
4642  }
4643 
4644  // ensure the offset is valid
4645  if (offset < left.page_end)
4646  offset = left.page_end;
4647  if (offset > right.page_start - 65536)
4648  offset = right.page_start - 65536;
4649 
4650  set_file_offset(f, (unsigned int) offset);
4651  } else {
4652  // binary search for large ranges (offset by 32K to ensure
4653  // we don't hit the right page)
4654  set_file_offset(f, left.page_end + (delta / 2) - 32768);
4655  }
4656 
4657  if (!vorbis_find_page(f, NULL, NULL)) goto error;
4658  }
4659 
4660  for (;;) {
4661  if (!get_seek_page_info(f, &mid)) goto error;
4662  if (mid.last_decoded_sample != ~0U) break;
4663  // (untested) no frames end on this page
4664  set_file_offset(f, mid.page_end);
4665  assert(mid.page_start < right.page_start);
4666  }
4667 
4668  // if we've just found the last page again then we're in a tricky file,
4669  // and we're close enough.
4670  if (mid.page_start == right.page_start)
4671  break;
4672 
4673  if (sample_number < mid.last_decoded_sample)
4674  right = mid;
4675  else
4676  left = mid;
4677 
4678  ++probe;
4679  }
4680 
4681  // seek back to start of the last packet
4682  page_start = left.page_start;
4683  set_file_offset(f, page_start);
4684  if (!start_page(f)) return error(f, VORBIS_seek_failed);
4685  end_pos = f->end_seg_with_known_loc;
4686  assert(end_pos >= 0);
4687 
4688  for (;;) {
4689  for (i = end_pos; i > 0; --i)
4690  if (f->segments[i-1] != 255)
4691  break;
4692 
4693  start_seg_with_known_loc = i;
4694 
4695  if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4696  break;
4697 
4698  // (untested) the final packet begins on an earlier page
4699  if (!go_to_page_before(f, page_start))
4700  goto error;
4701 
4702  page_start = stb_vorbis_get_file_offset(f);
4703  if (!start_page(f)) goto error;
4704  end_pos = f->segment_count - 1;
4705  }
4706 
4707  // prepare to start decoding
4708  f->current_loc_valid = FALSE;
4709  f->last_seg = FALSE;
4710  f->valid_bits = 0;
4711  f->packet_bytes = 0;
4712  f->bytes_in_seg = 0;
4713  f->previous_length = 0;
4714  f->next_seg = start_seg_with_known_loc;
4715 
4716  for (i = 0; i < start_seg_with_known_loc; i++)
4717  skip(f, f->segments[i]);
4718 
4719  // start decoding (optimizable - this frame is generally discarded)
4721  return 1;
4722 
4723 error:
4724  // try to restore the file to a valid state
4726  return error(f, VORBIS_seek_failed);
4727 }
4728 
4729 // the same as vorbis_decode_initial, but without advancing
4730 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4731 {
4732  int bits_read, bytes_read;
4733 
4734  if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4735  return 0;
4736 
4737  // either 1 or 2 bytes were read, figure out which so we can rewind
4738  bits_read = 1 + ilog(f->mode_count-1);
4739  if (f->mode_config[*mode].blockflag)
4740  bits_read += 2;
4741  bytes_read = (bits_read + 7) / 8;
4742 
4743  f->bytes_in_seg += bytes_read;
4744  f->packet_bytes -= bytes_read;
4745  skip(f, -bytes_read);
4746  if (f->next_seg == -1)
4747  f->next_seg = f->segment_count - 1;
4748  else
4749  f->next_seg--;
4750  f->valid_bits = 0;
4751 
4752  return 1;
4753 }
4754 
4755 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4756 {
4757  uint32 max_frame_samples;
4758 
4759  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4760 
4761  // fast page-level search
4762  if (!seek_to_sample_coarse(f, sample_number))
4763  return 0;
4764 
4765  assert(f->current_loc_valid);
4766  assert(f->current_loc <= sample_number);
4767 
4768  // linear search for the relevant packet
4769  max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4770  while (f->current_loc < sample_number) {
4771  int left_start, left_end, right_start, right_end, mode, frame_samples;
4772  if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4773  return error(f, VORBIS_seek_failed);
4774  // calculate the number of samples returned by the next frame
4775  frame_samples = right_start - left_start;
4776  if (f->current_loc + frame_samples > sample_number) {
4777  return 1; // the next frame will contain the sample
4778  } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4779  // there's a chance the frame after this could contain the sample
4781  } else {
4782  // this frame is too early to be relevant
4783  f->current_loc += frame_samples;
4784  f->previous_length = 0;
4785  maybe_start_packet(f);
4786  flush_packet(f);
4787  }
4788  }
4789  // the next frame will start with the sample
4790  assert(f->current_loc == sample_number);
4791  return 1;
4792 }
4793 
4794 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4795 {
4796  if (!stb_vorbis_seek_frame(f, sample_number))
4797  return 0;
4798 
4799  if (sample_number != f->current_loc) {
4800  int n;
4801  uint32 frame_start = f->current_loc;
4802  stb_vorbis_get_frame_float(f, &n, NULL);
4803  assert(sample_number > frame_start);
4804  assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4805  f->channel_buffer_start += (sample_number - frame_start);
4806  }
4807 
4808  return 1;
4809 }
4810 
4812 {
4813  if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4815  f->previous_length = 0;
4816  f->first_decode = TRUE;
4817  f->next_seg = -1;
4819 }
4820 
4822 {
4823  unsigned int restore_offset, previous_safe;
4824  unsigned int end, last_page_loc;
4825 
4826  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4827  if (!f->total_samples) {
4828  unsigned int last;
4829  uint32 lo,hi;
4830  char header[6];
4831 
4832  // first, store the current decode position so we can restore it
4833  restore_offset = stb_vorbis_get_file_offset(f);
4834 
4835  // now we want to seek back 64K from the end (the last page must
4836  // be at most a little less than 64K, but let's allow a little slop)
4837  if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4838  previous_safe = f->stream_len - 65536;
4839  else
4840  previous_safe = f->first_audio_page_offset;
4841 
4842  set_file_offset(f, previous_safe);
4843  // previous_safe is now our candidate 'earliest known place that seeking
4844  // to will lead to the final page'
4845 
4846  if (!vorbis_find_page(f, &end, &last)) {
4847  // if we can't find a page, we're hosed!
4849  f->total_samples = 0xffffffff;
4850  goto done;
4851  }
4852 
4853  // check if there are more pages
4854  last_page_loc = stb_vorbis_get_file_offset(f);
4855 
4856  // stop when the last_page flag is set, not when we reach eof;
4857  // this allows us to stop short of a 'file_section' end without
4858  // explicitly checking the length of the section
4859  while (!last) {
4860  set_file_offset(f, end);
4861  if (!vorbis_find_page(f, &end, &last)) {
4862  // the last page we found didn't have the 'last page' flag
4863  // set. whoops!
4864  break;
4865  }
4866  previous_safe = last_page_loc+1;
4867  last_page_loc = stb_vorbis_get_file_offset(f);
4868  }
4869 
4870  set_file_offset(f, last_page_loc);
4871 
4872  // parse the header
4873  getn(f, (unsigned char *)header, 6);
4874  // extract the absolute granule position
4875  lo = get32(f);
4876  hi = get32(f);
4877  if (lo == 0xffffffff && hi == 0xffffffff) {
4880  goto done;
4881  }
4882  if (hi)
4883  lo = 0xfffffffe; // saturate
4884  f->total_samples = lo;
4885 
4886  f->p_last.page_start = last_page_loc;
4887  f->p_last.page_end = end;
4888  f->p_last.last_decoded_sample = lo;
4889 
4890  done:
4891  set_file_offset(f, restore_offset);
4892  }
4893  return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4894 }
4895 
4897 {
4898  return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4899 }
4900 
4901 
4902 
4903 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4904 {
4905  int len, right,left,i;
4906  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4907 
4908  if (!vorbis_decode_packet(f, &len, &left, &right)) {
4910  return 0;
4911  }
4912 
4913  len = vorbis_finish_frame(f, len, left, right);
4914  for (i=0; i < f->channels; ++i)
4915  f->outputs[i] = f->channel_buffers[i] + left;
4916 
4917  f->channel_buffer_start = left;
4918  f->channel_buffer_end = left+len;
4919 
4920  if (channels) *channels = f->channels;
4921  if (output) *output = f->outputs;
4922  return len;
4923 }
4924 
4925 #ifndef STB_VORBIS_NO_STDIO
4926 
4927 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4928 {
4929  stb_vorbis *f, p;
4930  vorbis_init(&p, alloc);
4931  p.f = file;
4932  p.f_start = (uint32) ftell(file);
4933  p.stream_len = length;
4934  p.close_on_free = close_on_free;
4935  if (start_decoder(&p)) {
4936  f = vorbis_alloc(&p);
4937  if (f) {
4938  *f = p;
4940  return f;
4941  }
4942  }
4943  if (error) *error = p.error;
4944  vorbis_deinit(&p);
4945  return NULL;
4946 }
4947 
4948 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4949 {
4950  unsigned int len, start;
4951  start = (unsigned int) ftell(file);
4952  fseek(file, 0, SEEK_END);
4953  len = (unsigned int) (ftell(file) - start);
4954  fseek(file, start, SEEK_SET);
4955  return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4956 }
4957 
4958 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4959 {
4960  FILE *f = fopen(filename, "rb");
4961  if (f)
4962  return stb_vorbis_open_file(f, TRUE, error, alloc);
4963  if (error) *error = VORBIS_file_open_failure;
4964  return NULL;
4965 }
4966 #endif // STB_VORBIS_NO_STDIO
4967 
4968 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4969 {
4970  stb_vorbis *f, p;
4971  if (data == NULL) return NULL;
4972  vorbis_init(&p, alloc);
4973  p.stream = (uint8 *) data;
4974  p.stream_end = (uint8 *) data + len;
4975  p.stream_start = (uint8 *) p.stream;
4976  p.stream_len = len;
4977  p.push_mode = FALSE;
4978  if (start_decoder(&p)) {
4979  f = vorbis_alloc(&p);
4980  if (f) {
4981  *f = p;
4983  return f;
4984  }
4985  }
4986  if (error) *error = p.error;
4987  vorbis_deinit(&p);
4988  return NULL;
4989 }
4990 
4991 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4992 #define PLAYBACK_MONO 1
4993 #define PLAYBACK_LEFT 2
4994 #define PLAYBACK_RIGHT 4
4995 
4996 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
4997 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
4998 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
4999 
5000 static int8 channel_position[7][6] =
5001 {
5002  { 0 },
5003  { C },
5004  { L, R },
5005  { L, C, R },
5006  { L, R, L, R },
5007  { L, C, R, L, R },
5008  { L, C, R, L, R, C },
5009 };
5010 
5011 
5012 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5013  typedef union {
5014  float f;
5015  int i;
5016  } float_conv;
5017  typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5018  #define FASTDEF(x) float_conv x
5019  // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5020  #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5021  #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5022  #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5023  #define check_endianness()
5024 #else
5025  #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5026  #define check_endianness()
5027  #define FASTDEF(x)
5028 #endif
5029 
5030 static void copy_samples(short *dest, float *src, int len)
5031 {
5032  int i;
5033  check_endianness();
5034  for (i=0; i < len; ++i) {
5035  FASTDEF(temp);
5036  int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5037  if ((unsigned int) (v + 32768) > 65535)
5038  v = v < 0 ? -32768 : 32767;
5039  dest[i] = v;
5040  }
5041 }
5042 
5043 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5044 {
5045  #define BUFFER_SIZE 32
5046  float buffer[BUFFER_SIZE];
5047  int i,j,o,n = BUFFER_SIZE;
5048  check_endianness();
5049  for (o = 0; o < len; o += BUFFER_SIZE) {
5050  memset(buffer, 0, sizeof(buffer));
5051  if (o + n > len) n = len - o;
5052  for (j=0; j < num_c; ++j) {
5053  if (channel_position[num_c][j] & mask) {
5054  for (i=0; i < n; ++i)
5055  buffer[i] += data[j][d_offset+o+i];
5056  }
5057  }
5058  for (i=0; i < n; ++i) {
5059  FASTDEF(temp);
5060  int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5061  if ((unsigned int) (v + 32768) > 65535)
5062  v = v < 0 ? -32768 : 32767;
5063  output[o+i] = v;
5064  }
5065  }
5066 }
5067 
5068 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5069 {
5070  #define BUFFER_SIZE 32
5071  float buffer[BUFFER_SIZE];
5072  int i,j,o,n = BUFFER_SIZE >> 1;
5073  // o is the offset in the source data
5074  check_endianness();
5075  for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5076  // o2 is the offset in the output data
5077  int o2 = o << 1;
5078  memset(buffer, 0, sizeof(buffer));
5079  if (o + n > len) n = len - o;
5080  for (j=0; j < num_c; ++j) {
5081  int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5082  if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5083  for (i=0; i < n; ++i) {
5084  buffer[i*2+0] += data[j][d_offset+o+i];
5085  buffer[i*2+1] += data[j][d_offset+o+i];
5086  }
5087  } else if (m == PLAYBACK_LEFT) {
5088  for (i=0; i < n; ++i) {
5089  buffer[i*2+0] += data[j][d_offset+o+i];
5090  }
5091  } else if (m == PLAYBACK_RIGHT) {
5092  for (i=0; i < n; ++i) {
5093  buffer[i*2+1] += data[j][d_offset+o+i];
5094  }
5095  }
5096  }
5097  for (i=0; i < (n<<1); ++i) {
5098  FASTDEF(temp);
5099  int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5100  if ((unsigned int) (v + 32768) > 65535)
5101  v = v < 0 ? -32768 : 32767;
5102  output[o2+i] = v;
5103  }
5104  }
5105 }
5106 
5107 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5108 {
5109  int i;
5110  if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5111  static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5112  for (i=0; i < buf_c; ++i)
5113  compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5114  } else {
5115  int limit = buf_c < data_c ? buf_c : data_c;
5116  for (i=0; i < limit; ++i)
5117  copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5118  for ( ; i < buf_c; ++i)
5119  memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5120  }
5121 }
5122 
5123 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5124 {
5125  float **output = NULL;
5126  int len = stb_vorbis_get_frame_float(f, NULL, &output);
5127  if (len > num_samples) len = num_samples;
5128  if (len)
5129  convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5130  return len;
5131 }
5132 
5133 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5134 {
5135  int i;
5136  check_endianness();
5137  if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5138  assert(buf_c == 2);
5139  for (i=0; i < buf_c; ++i)
5140  compute_stereo_samples(buffer, data_c, data, d_offset, len);
5141  } else {
5142  int limit = buf_c < data_c ? buf_c : data_c;
5143  int j;
5144  for (j=0; j < len; ++j) {
5145  for (i=0; i < limit; ++i) {
5146  FASTDEF(temp);
5147  float f = data[i][d_offset+j];
5148  int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5149  if ((unsigned int) (v + 32768) > 65535)
5150  v = v < 0 ? -32768 : 32767;
5151  *buffer++ = v;
5152  }
5153  for ( ; i < buf_c; ++i)
5154  *buffer++ = 0;
5155  }
5156  }
5157 }
5158 
5159 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5160 {
5161  float **output;
5162  int len;
5163  if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5164  len = stb_vorbis_get_frame_float(f, NULL, &output);
5165  if (len) {
5166  if (len*num_c > num_shorts) len = num_shorts / num_c;
5167  convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5168  }
5169  return len;
5170 }
5171 
5172 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5173 {
5174  float **outputs;
5175  int len = num_shorts / channels;
5176  int n=0;
5177  int z = f->channels;
5178  if (z > channels) z = channels;
5179  while (n < len) {
5180  int k = f->channel_buffer_end - f->channel_buffer_start;
5181  if (n+k >= len) k = len - n;
5182  if (k)
5184  buffer += k*channels;
5185  n += k;
5186  f->channel_buffer_start += k;
5187  if (n == len) break;
5188  if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5189  }
5190  return n;
5191 }
5192 
5193 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5194 {
5195  float **outputs;
5196  int n=0;
5197  int z = f->channels;
5198  if (z > channels) z = channels;
5199  while (n < len) {
5200  int k = f->channel_buffer_end - f->channel_buffer_start;
5201  if (n+k >= len) k = len - n;
5202  if (k)
5203  convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5204  n += k;
5205  f->channel_buffer_start += k;
5206  if (n == len) break;
5207  if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5208  }
5209  return n;
5210 }
5211 
5212 #ifndef STB_VORBIS_NO_STDIO
5213 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5214 {
5215  int data_len, offset, total, limit, error;
5216  short *data;
5217  stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5218  if (v == NULL) return -1;
5219  limit = v->channels * 4096;
5220  *channels = v->channels;
5221  if (sample_rate)
5222  *sample_rate = v->sample_rate;
5223  offset = data_len = 0;
5224  total = limit;
5225  data = (short *) malloc(total * sizeof(*data));
5226  if (data == NULL) {
5227  stb_vorbis_close(v);
5228  return -2;
5229  }
5230  for (;;) {
5231  int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5232  if (n == 0) break;
5233  data_len += n;
5234  offset += n * v->channels;
5235  if (offset + limit > total) {
5236  short *data2;
5237  total *= 2;
5238  data2 = (short *) realloc(data, total * sizeof(*data));
5239  if (data2 == NULL) {
5240  free(data);
5241  stb_vorbis_close(v);
5242  return -2;
5243  }
5244  data = data2;
5245  }
5246  }
5247  *output = data;
5248  stb_vorbis_close(v);
5249  return data_len;
5250 }
5251 #endif // NO_STDIO
5252 
5253 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5254 {
5255  int data_len, offset, total, limit, error;
5256  short *data;
5257  stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5258  if (v == NULL) return -1;
5259  limit = v->channels * 4096;
5260  *channels = v->channels;
5261  if (sample_rate)
5262  *sample_rate = v->sample_rate;
5263  offset = data_len = 0;
5264  total = limit;
5265  data = (short *) malloc(total * sizeof(*data));
5266  if (data == NULL) {
5267  stb_vorbis_close(v);
5268  return -2;
5269  }
5270  for (;;) {
5271  int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5272  if (n == 0) break;
5273  data_len += n;
5274  offset += n * v->channels;
5275  if (offset + limit > total) {
5276  short *data2;
5277  total *= 2;
5278  data2 = (short *) realloc(data, total * sizeof(*data));
5279  if (data2 == NULL) {
5280  free(data);
5281  stb_vorbis_close(v);
5282  return -2;
5283  }
5284  data = data2;
5285  }
5286  }
5287  *output = data;
5288  stb_vorbis_close(v);
5289  return data_len;
5290 }
5291 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5292 
5293 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5294 {
5295  float **outputs;
5296  int len = num_floats / channels;
5297  int n=0;
5298  int z = f->channels;
5299  if (z > channels) z = channels;
5300  while (n < len) {
5301  int i,j;
5302  int k = f->channel_buffer_end - f->channel_buffer_start;
5303  if (n+k >= len) k = len - n;
5304  for (j=0; j < k; ++j) {
5305  for (i=0; i < z; ++i)
5306  *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5307  for ( ; i < channels; ++i)
5308  *buffer++ = 0;
5309  }
5310  n += k;
5311  f->channel_buffer_start += k;
5312  if (n == len)
5313  break;
5314  if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5315  break;
5316  }
5317  return n;
5318 }
5319 
5320 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5321 {
5322  float **outputs;
5323  int n=0;
5324  int z = f->channels;
5325  if (z > channels) z = channels;
5326  while (n < num_samples) {
5327  int i;
5328  int k = f->channel_buffer_end - f->channel_buffer_start;
5329  if (n+k >= num_samples) k = num_samples - n;
5330  if (k) {
5331  for (i=0; i < z; ++i)
5332  memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5333  for ( ; i < channels; ++i)
5334  memset(buffer[i]+n, 0, sizeof(float) * k);
5335  }
5336  n += k;
5337  f->channel_buffer_start += k;
5338  if (n == num_samples)
5339  break;
5340  if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5341  break;
5342  }
5343  return n;
5344 }
5345 #endif // STB_VORBIS_NO_PULLDATA_API
5346 
5347 /* Version history
5348  1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5349  1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5350  avoid discarding last frame of audio data
5351  1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5352  some more crash fixes when out of memory or with corrupt files
5353  1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5354  some crash fixes when out of memory or with corrupt files
5355  1.05 - 2015/04/19 - don't define __forceinline if it's redundant
5356  1.04 - 2014/08/27 - fix missing const-correct case in API
5357  1.03 - 2014/08/07 - Warning fixes
5358  1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows
5359  1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float
5360  1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5361  (API change) report sample rate for decode-full-file funcs
5362  0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5363  0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5364  0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5365  0.99993 - remove assert that fired on legal files with empty tables
5366  0.99992 - rewind-to-start
5367  0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5368  0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5369  0.9998 - add a full-decode function with a memory source
5370  0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5371  0.9996 - query length of vorbis stream in samples/seconds
5372  0.9995 - bugfix to another optimization that only happened in certain files
5373  0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5374  0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5375  0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5376  0.9991 - performance improvement of IMDCT
5377  0.999 - (should have been 0.9990) performance improvement of IMDCT
5378  0.998 - no-CRT support from Casey Muratori
5379  0.997 - bugfixes for bugs found by Terje Mathisen
5380  0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5381  0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5382  0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5383  0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5384  0.992 - fixes for MinGW warning
5385  0.991 - turn fast-float-conversion on by default
5386  0.990 - fix push-mode seek recovery if you seek into the headers
5387  0.98b - fix to bad release of 0.98
5388  0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5389  0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5390  0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5391  0.95 - clamping code for 16-bit functions
5392  0.94 - not publically released
5393  0.93 - fixed all-zero-floor case (was decoding garbage)
5394  0.92 - fixed a memory leak
5395  0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5396  0.90 - first public release
5397 */
5398 
5399 #endif // STB_VORBIS_HEADER_ONLY