 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Go to the documentation of this file.
1 // Ogg Vorbis audio decoder - v1.09 - public domain
2 //
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
11 //
12 // This software is dual-licensed to the public domain and under the following
13 // license: you are granted a perpetual, irrevocable license to copy, modify,
14 // publish, and distribute this file as you see fit.
15 //
16 // No warranty for any purpose is expressed or implied by the author (nor
17 // by RAD Game Tools). Report bugs and send enhancements to the author.
18 //
19 // Limitations:
20 //
21 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
22 // - lossless sample-truncation at beginning ignored
23 // - cannot concatenate multiple vorbis streams
24 // - sample positions are 32-bit, limiting seekable 192Khz
25 // files to around 6 hours (Ogg supports 64-bit)
26 //
27 // Feature contributors:
28 // Dougall Johnson (sample-exact seeking)
29 //
30 // Bugfix/warning contributors:
31 // Terje Mathisen Niklas Frykholm Andy Hill
32 // Casey Muratori John Bolton Gargaj
33 // Laurent Gomila Marc LeBlanc Ronny Chevalier
34 // Bernhard Wodo Evan Balster alxprd@github
35 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
36 // Phillip Bennefall Rohit Thiago Goulart
37 // manxorist@github saga musix
38 //
39 // Partial history:
40 // 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version
41 // 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
42 // 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
43 // 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
44 // some crash fixes when out of memory or with corrupt files
45 // fix some inappropriately signed shifts
46 // 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
47 // 1.04 - 2014/08/27 - fix missing const-correct case in API
48 // 1.03 - 2014/08/07 - warning fixes
49 // 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
50 // 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
51 // 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
52 // (API change) report sample rate for decode-full-file funcs
53 //
54 // See end of file for full version history.
58 //
60 //
65 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
66 #define STB_VORBIS_NO_STDIO 1
67 #endif
70 #include <stdio.h>
71 #endif
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
79 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
80 // them from multiple threads at the same time. However, you can have multiple
81 // stb_vorbis* handles and decode from them independently in multiple thrads.
86 // normally stb_vorbis uses malloc() to allocate memory at startup,
87 // and alloca() to allocate temporary memory during a frame on the
88 // stack. (Memory consumption will depend on the amount of setup
89 // data in the file and how you set the compile flags for speed
90 // vs. size. In my test files the maximal-size usage is ~150KB.)
91 //
92 // You can modify the wrapper functions in the source (setup_malloc,
93 // setup_temp_malloc, temp_malloc) to change this behavior, or you
94 // can use a simpler allocation model: you pass in a buffer from
95 // which stb_vorbis will allocate _all_ its memory (including the
96 // temp memory). "open" may fail with a VORBIS_outofmem if you
97 // do not pass in enough data; there is no way to determine how
98 // much you do need except to succeed (at which point you can
99 // query get_info to find the exact amount required. yes I know
100 // this is lame).
101 //
102 // If you pass in a non-NULL buffer of the type below, allocation
103 // will occur from it as described above. Otherwise just pass NULL
104 // to use malloc()/alloca()
106 typedef struct
107 {
115 typedef struct stb_vorbis stb_vorbis;
117 typedef struct
118 {
119  unsigned int sample_rate;
120  int channels;
122  unsigned int setup_memory_required;
124  unsigned int temp_memory_required;
129 // get general information about the file
132 // get the last error detected (clears it, too)
133 extern int stb_vorbis_get_error(stb_vorbis *f);
135 // close an ogg vorbis file and free all memory in use
136 extern void stb_vorbis_close(stb_vorbis *f);
138 // this function returns the offset (in samples) from the beginning of the
139 // file that will be returned by the next decode, if it is known, or -1
140 // otherwise. after a flush_pushdata() call, this may take a while before
141 // it becomes valid again.
142 // NOT WORKING YET after a seek with PULLDATA API
145 // returns the current seek point within the file, or offset from the beginning
146 // of the memory buffer. In pushdata mode it returns 0.
147 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
161  const unsigned char * datablock, int datablock_length_in_bytes,
162  int *datablock_memory_consumed_in_bytes,
163  int *error,
164  const stb_vorbis_alloc *alloc_buffer);
165 // create a vorbis decoder by passing in the initial data block containing
166 // the ogg&vorbis headers (you don't need to do parse them, just provide
167 // the first N bytes of the file--you're told if it's not enough, see below)
168 // on success, returns an stb_vorbis *, does not set error, returns the amount of
169 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
170 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
171 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
172 // incomplete and you need to pass in a larger block from the start of the file
175  stb_vorbis *f,
176  const unsigned char *datablock, int datablock_length_in_bytes,
177  int *channels, // place to write number of float * buffers
178  float ***output, // place to write float ** array of float * buffers
179  int *samples // place to write number of output samples
180  );
181 // decode a frame of audio sample data if possible from the passed-in data block
182 //
183 // return value: number of bytes we used from datablock
184 //
185 // possible cases:
186 // 0 bytes used, 0 samples output (need more data)
187 // N bytes used, 0 samples output (resynching the stream, keep going)
188 // N bytes used, M samples output (one frame of data)
189 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
190 // frame, because Vorbis always "discards" the first frame.
191 //
192 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
193 // instead only datablock_length_in_bytes-3 or less. This is because it wants
194 // to avoid missing parts of a page header if they cross a datablock boundary,
195 // without writing state-machiney code to record a partial detection.
196 //
197 // The number of channels returned are stored in *channels (which can be
198 // NULL--it is always the same as the number of channels reported by
199 // get_info). *output will contain an array of float* buffers, one per
200 // channel. In other words, (*output)[0][0] contains the first sample from
201 // the first channel, and (*output)[1][0] contains the first sample from
202 // the second channel.
205 // inform stb_vorbis that your next datablock will not be contiguous with
206 // previous ones (e.g. you've seeked in the data); future attempts to decode
207 // frames will cause stb_vorbis to resynchronize (as noted above), and
208 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
209 // will begin decoding the _next_ frame.
210 //
211 // if you want to seek using pushdata, you need to seek in your file, then
212 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
213 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
214 // if you don't like the result, seek your file again and repeat.
215 #endif
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE * that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE * case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
229 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
230 #endif
232 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
233 #endif
234 // decode an entire file and output the data interleaved into a malloc()ed
235 // buffer stored in *output. The return value is the number of samples
236 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
237 // When you're done with it, just free() the pointer returned in *output.
239 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
240  int *error, const stb_vorbis_alloc *alloc_buffer);
241 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
242 // this must be the entire stream!). on failure, returns NULL and sets *error
245 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
246  int *error, const stb_vorbis_alloc *alloc_buffer);
247 // create an ogg vorbis decoder from a filename via fopen(). on failure,
248 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
250 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
251  int *error, const stb_vorbis_alloc *alloc_buffer);
252 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
253 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
254 // note that stb_vorbis must "own" this stream; if you seek it in between
255 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
256 // perform stb_vorbis_seek_*() operations on this file, it will assume it
257 // owns the _entire_ rest of the file after the start point. Use the next
258 // function, stb_vorbis_open_file_section(), to limit it.
260 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
261  int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
262 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
263 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
264 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
265 // this stream; if you seek it in between calls to stb_vorbis, it will become
266 // confused.
267 #endif
269 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
270 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
271 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
272 // after calling seek_frame(), the next call to get_frame_*() will include
273 // the specified sample. after calling stb_vorbis_seek(), the next call to
274 // stb_vorbis_get_samples_* will start with the specified sample. If you
275 // do not need to seek to EXACTLY the target sample when using get_samples_*,
276 // you can also use seek_frame().
278 extern void stb_vorbis_seek_start(stb_vorbis *f);
279 // this function is equivalent to stb_vorbis_seek(f,0)
281 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
283 // these functions return the total length of the vorbis stream
285 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
286 // decode the next frame and return the number of samples. the number of
287 // channels returned are stored in *channels (which can be NULL--it is always
288 // the same as the number of channels reported by get_info). *output will
289 // contain an array of float* buffers, one per channel. These outputs will
290 // be overwritten on the next call to stb_vorbis_get_frame_*.
291 //
292 // You generally should not intermix calls to stb_vorbis_get_frame_*()
293 // and stb_vorbis_get_samples_*(), since the latter calls the former.
296 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
297 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
298 #endif
299 // decode the next frame and return the number of *samples* per channel.
300 // Note that for interleaved data, you pass in the number of shorts (the
301 // size of your array), but the return value is the number of samples per
302 // channel, not the total number of samples.
303 //
304 // The data is coerced to the number of channels you request according to the
305 // channel coercion rules (see below). You must pass in the size of your
306 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
307 // The maximum buffer size needed can be gotten from get_info(); however,
308 // the Vorbis I specification implies an absolute maximum of 4096 samples
309 // per channel.
311 // Channel coercion rules:
312 // Let M be the number of channels requested, and N the number of channels present,
313 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
314 // and stereo R be the sum of all R and center channels (channel assignment from the
315 // vorbis spec).
316 // M N output
317 // 1 k sum(Ck) for all k
318 // 2 * stereo L, stereo R
319 // k l k > l, the first l channels, then 0s
320 // k l k <= l, the first k channels
321 // Note that this is not _good_ surround etc. mixing at all! It's just so
322 // you get something useful.
324 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
325 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
326 // gets num_samples samples, not necessarily on a frame boundary--this requires
327 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
328 // Returns the number of samples stored per channel; it may be less than requested
329 // at the end of the file. If there are no more samples in the file, returns 0.
332 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
333 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
334 #endif
335 // gets num_samples samples, not necessarily on a frame boundary--this requires
336 // buffering so you have to supply the buffers. Applies the coercion rules above
337 // to produce 'channels' channels. Returns the number of samples stored per channel;
338 // it may be less than requested at the end of the file. If there are no more
339 // samples in the file, returns 0.
341 #endif
346 {
349  VORBIS_need_more_data=1, // not a real error
351  VORBIS_invalid_api_mixing, // can't mix API modes
352  VORBIS_outofmem, // not enough memory
354  VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
355  VORBIS_file_open_failure, // fopen() failed
356  VORBIS_seek_without_length, // can't seek in unknown-length file
358  VORBIS_unexpected_eof=10, // file is truncated?
359  VORBIS_seek_invalid, // seek past EOF
361  // decoding errors (corrupt/invalid stream) -- you probably
362  // don't care about the exact details of these
364  // vorbis errors:
368  // ogg errors:
377 };
380 #ifdef __cplusplus
381 }
382 #endif
385 //
387 //
392 // global configuration settings (e.g. set these in the project/makefile),
393 // or just set them in this file at the top (although ideally the first few
394 // should be visible when the header file is compiled too, although it's not
395 // crucial)
398 // does not compile the code for the various stb_vorbis_*_pushdata()
399 // functions
403 // does not compile the code for the non-pushdata APIs
407 // does not compile the code for the APIs that use FILE *s internally
408 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
409 // #define STB_VORBIS_NO_STDIO
412 // does not compile the code for converting audio sample data from
413 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
417 // does not use a fast float-to-int trick to accelerate float-to-int on
418 // most platforms which requires endianness be defined correctly.
423 // globally define this to the maximum number of channels you need.
424 // The spec does not put a restriction on channels except that
425 // the count is stored in a byte, so 255 is the hard limit.
426 // Reducing this saves about 16 bytes per value, so using 16 saves
427 // (255-16)*16 or around 4KB. Plus anything other memory usage
428 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
429 // 6 (5.1 audio), or 2 (stereo only).
431 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
432 #endif
435 // after a flush_pushdata(), stb_vorbis begins scanning for the
436 // next valid page, without backtracking. when it finds something
437 // that looks like a page, it streams through it and verifies its
438 // CRC32. Should that validation fail, it keeps scanning. But it's
439 // possible that _while_ streaming through to check the CRC32 of
440 // one candidate page, it sees another candidate page. This #define
441 // determines how many "overlapping" candidate pages it can search
442 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
443 // garbage pages could be as big as 64KB, but probably average ~16KB.
444 // So don't hose ourselves by scanning an apparent 64KB page and
445 // missing a ton of real ones in the interim; so minimum of 2
448 #endif
451 // sets the log size of the huffman-acceleration table. Maximum
452 // supported value is 24. with larger numbers, more decodings are O(1),
453 // but the table size is larger so worse cache missing, so you'll have
454 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
457 #endif
460 // sets the log size of the binary-search acceleration table. this
461 // is used in similar fashion to the fast-huffman size to set initial
462 // parameters for the binary search
465 // The fast huffman tables are much more efficient if they can be
466 // stored as 16-bit results instead of 32-bit results. This restricts
467 // the codebooks to having only 65535 possible outcomes, though.
468 // (At least, accelerated by the huffman table.)
471 #endif
474 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
475 // back on binary searching for the correct one. This requires storing
476 // extra tables with the huffman codes in sorted order. Defining this
477 // symbol trades off space for speed by forcing a linear search in the
478 // non-fast case, except for "sparse" codebooks.
482 // stb_vorbis precomputes the result of the scalar residue decoding
483 // that would otherwise require a divide per chunk. you can trade off
484 // space for time by defining this symbol.
488 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
489 // stored, or with all elements being chosen from a small range of values,
490 // and all values possible in all elements. By default, stb_vorbis expands
491 // this latter kind out to look like the former kind for ease of decoding,
492 // because otherwise an integer divide-per-vector-element is required to
493 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
494 // trade off storage for speed.
498 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
499 #endif
502 // this replaces small integer divides in the floor decode loop with
503 // table lookups. made less than 1% difference, so disabled by default.
506 // disables the inlining of the scalar codebook fast-huffman decode.
507 // might save a little codespace; useful for debugging
511 // Normally we only decode the floor without synthesizing the actual
512 // full curve. We can instead synthesize the curve immediately. This
513 // requires more memory and is very likely slower, so I don't think
514 // you'd ever want to do it except for debugging.
524  #define STB_VORBIS_NO_STDIO
525 #endif
527 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
528  #define STB_VORBIS_NO_STDIO 1
529 #endif
534  // only need endianness for fast-float-to-int, which we don't
535  // use for pushdata
538  #define STB_VORBIS_ENDIAN 0
539  #else
540  #define STB_VORBIS_ENDIAN 1
541  #endif
543 #endif
544 #endif
548 #include <stdio.h>
549 #endif
551 #ifndef STB_VORBIS_NO_CRT
552  #include <stdlib.h>
553  #include <string.h>
554  #include <assert.h>
555  #include <math.h>
557  // find definition of alloca if it's not in stdlib.h:
558  #ifdef _MSC_VER
559  #include <malloc.h>
560  #endif
561  #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
562  #include <alloca.h>
563  #endif
564 #else // STB_VORBIS_NO_CRT
565  #define NULL 0
566  #define malloc(s) 0
567  #define free(s) ((void) 0)
568  #define realloc(s) 0
569 #endif // STB_VORBIS_NO_CRT
571 #include <limits.h>
573 #ifdef __MINGW32__
574  // eff you mingw:
575  // "fixed":
576  //
577  // "no that broke the build, reverted, who cares about C":
578  //
579  #ifdef __forceinline
580  #undef __forceinline
581  #endif
582  #define __forceinline
583 #elif !defined(_MSC_VER)
584  #if __GNUC__
585  #define __forceinline inline
586  #else
587  #define __forceinline
588  #endif
589 #endif
592 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
593 #endif
596 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
597 #endif
600 #if 0
601 #include <crtdbg.h>
602 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
603 #else
604 #define CHECK(f) ((void) 0)
605 #endif
607 #define MAX_BLOCKSIZE_LOG 13 // from specification
611 typedef unsigned char uint8;
612 typedef signed char int8;
613 typedef unsigned short uint16;
614 typedef signed short int16;
615 typedef unsigned int uint32;
616 typedef signed int int32;
618 #ifndef TRUE
619 #define TRUE 1
620 #define FALSE 0
621 #endif
623 typedef float codetype;
625 // @NOTE
626 //
627 // Some arrays below are tagged "//varies", which means it's actually
628 // a variable-sized piece of data, but rather than malloc I assume it's
629 // small enough it's better to just allocate it all together with the
630 // main thing
631 //
632 // Most of the variables are specified with the smallest size I could pack
633 // them into. It might give better performance to make them all full-sized
634 // integers. It should be safe to freely rearrange the structures or change
635 // the sizes larger--nothing relies on silently truncating etc., nor the
636 // order of variables.
641 typedef struct
642 {
643  int dimensions, entries;
646  float delta_value;
656  #else
657  int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
658  #endif
662 } Codebook;
664 typedef struct
665 {
672  uint8 book_list[16]; // varies
673 } Floor0;
675 typedef struct
676 {
678  uint8 partition_class_list[32]; // varies
679  uint8 class_dimensions[16]; // varies
680  uint8 class_subclasses[16]; // varies
681  uint8 class_masterbooks[16]; // varies
682  int16 subclass_books[16][8]; // varies
683  uint16 Xlist[31*8+2]; // varies
684  uint8 sorted_order[31*8+2];
685  uint8 neighbors[31*8+2][2];
688  int values;
689 } Floor1;
691 typedef union
692 {
695 } Floor;
697 typedef struct
698 {
699  uint32 begin, end;
704  int16 (*residue_books)[8];
705 } Residue;
707 typedef struct
708 {
714 typedef struct
715 {
719  uint8 submap_floor[15]; // varies
720  uint8 submap_residue[15]; // varies
721 } Mapping;
723 typedef struct
724 {
729 } Mode;
731 typedef struct
732 {
733  uint32 goal_crc; // expected crc if match
734  int bytes_left; // bytes left in packet
735  uint32 crc_so_far; // running crc
736  int bytes_done; // bytes processed in _current_ chunk
737  uint32 sample_loc; // granule pos encoded in page
738 } CRCscan;
740 typedef struct
741 {
742  uint32 page_start, page_end;
744 } ProbedPage;
747 {
748  // user-accessible info
749  unsigned int sample_rate;
750  int channels;
752  unsigned int setup_memory_required;
753  unsigned int temp_memory_required;
756  // input config
758  FILE *f;
761 #endif
775  // memory management
780  // run-time results
781  int eof;
784  // user-useful data
786  // header info
787  int blocksize[2];
792  uint16 floor_types[64]; // varies
795  uint16 residue_types[64]; // varies
800  Mode mode_config[64]; // varies
804  // decode buffer
813  #else
814  float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
815  #endif
817  uint32 current_loc; // sample location of next frame to decode
820  // per-blocksize precomputed data
822  // twiddle factors
823  float *A[2],*B[2],*C[2];
824  float *window[2];
827  // current page/packet/segment streaming info
828  uint32 serial; // stream serial number for verification
835  int next_seg;
836  int last_seg; // flag that we're on the last segment
837  int last_seg_which; // what was the segment number of the last seg?
846  // push mode scanning
847  int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
850 #endif
852  // sample-access
855 };
858  #define IS_PUSH_MODE(f) FALSE
859 #elif defined(STB_VORBIS_NO_PULLDATA_API)
860  #define IS_PUSH_MODE(f) TRUE
861 #else
862  #define IS_PUSH_MODE(f) ((f)->push_mode)
863 #endif
865 typedef struct stb_vorbis vorb;
867 static int error(vorb *f, enum STBVorbisError e)
868 {
869  f->error = e;
870  if (!f->eof && e != VORBIS_need_more_data) {
871  f->error=e; // breakpoint for debugging
872  }
873  return 0;
874 }
877 // these functions are used for allocating temporary memory
878 // while decoding. if you can afford the stack space, use
879 // alloca(); otherwise, provide a temp buffer and it will
880 // allocate out of those.
882 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
884 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
885 #ifdef dealloca
886 #define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size))
887 #else
888 #define temp_free(f,p) 0
889 #endif
890 #define temp_alloc_save(f) ((f)->temp_offset)
891 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
893 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
895 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
896 static void *make_block_array(void *mem, int count, int size)
897 {
898  int i;
899  void ** p = (void **) mem;
900  char *q = (char *) (p + count);
901  for (i=0; i < count; ++i) {
902  p[i] = q;
903  q += size;
904  }
905  return p;
906 }
908 static void *setup_malloc(vorb *f, int sz)
909 {
910  sz = (sz+3) & ~3;
911  f->setup_memory_required += sz;
912  if (f->alloc.alloc_buffer) {
913  void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
914  if (f->setup_offset + sz > f->temp_offset) return NULL;
915  f->setup_offset += sz;
916  return p;
917  }
918  return sz ? malloc(sz) : NULL;
919 }
921 static void setup_free(vorb *f, void *p)
922 {
923  if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
924  free(p);
925 }
927 static void *setup_temp_malloc(vorb *f, int sz)
928 {
929  sz = (sz+3) & ~3;
930  if (f->alloc.alloc_buffer) {
931  if (f->temp_offset - sz < f->setup_offset) return NULL;
932  f->temp_offset -= sz;
933  return (char *) f->alloc.alloc_buffer + f->temp_offset;
934  }
935  return malloc(sz);
936 }
938 static void setup_temp_free(vorb *f, void *p, int sz)
939 {
940  if (f->alloc.alloc_buffer) {
941  f->temp_offset += (sz+3)&~3;
942  return;
943  }
944  free(p);
945 }
947 #define CRC32_POLY 0x04c11db7 // from spec
949 static uint32 crc_table[256];
950 static void crc32_init(void)
951 {
952  int i,j;
953  uint32 s;
954  for(i=0; i < 256; i++) {
955  for (s=(uint32) i << 24, j=0; j < 8; ++j)
956  s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
957  crc_table[i] = s;
958  }
959 }
962 {
963  return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
964 }
967 // used in setup, and for huffman that doesn't go fast path
968 static unsigned int bit_reverse(unsigned int n)
969 {
970  n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
971  n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
972  n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
973  n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
974  return (n >> 16) | (n << 16);
975 }
977 static float square(float x)
978 {
979  return x*x;
980 }
982 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
983 // as required by the specification. fast(?) implementation from stb.h
984 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
985 static int ilog(int32 n)
986 {
987  static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
989  // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
990  if (n < (1 << 14))
991  if (n < (1 << 4)) return 0 + log2_4[n ];
992  else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
993  else return 10 + log2_4[n >> 10];
994  else if (n < (1 << 24))
995  if (n < (1 << 19)) return 15 + log2_4[n >> 15];
996  else return 20 + log2_4[n >> 20];
997  else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
998  else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
999  else return 0; // signed n returns 0
1000 }
1002 #ifndef M_PI
1003  #define M_PI 3.14159265358979323846264f // from CRC
1004 #endif
1006 // code length assigned to a value with no huffman encoding
1007 #define NO_CODE 255
1010 //
1011 // these functions are only called at setup, and only a few times
1012 // per file
1014 static float float32_unpack(uint32 x)
1015 {
1016  // from the specification
1017  uint32 mantissa = x & 0x1fffff;
1018  uint32 sign = x & 0x80000000;
1019  uint32 exp = (x & 0x7fe00000) >> 21;
1020  double res = sign ? -(double)mantissa : (double)mantissa;
1021  return (float) ldexp((float)res, exp-788);
1022 }
1025 // zlib & jpeg huffman tables assume that the output symbols
1026 // can either be arbitrarily arranged, or have monotonically
1027 // increasing frequencies--they rely on the lengths being sorted;
1028 // this makes for a very simple generation algorithm.
1029 // vorbis allows a huffman table with non-sorted lengths. This
1030 // requires a more sophisticated construction, since symbols in
1031 // order do not map to huffman codes "in order".
1032 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1033 {
1034  if (!c->sparse) {
1035  c->codewords [symbol] = huff_code;
1036  } else {
1037  c->codewords [count] = huff_code;
1038  c->codeword_lengths[count] = len;
1039  values [count] = symbol;
1040  }
1041 }
1043 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1044 {
1045  int i,k,m=0;
1046  uint32 available[32];
1048  memset(available, 0, sizeof(available));
1049  // find the first entry
1050  for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1051  if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1052  // add to the list
1053  add_entry(c, 0, k, m++, len[k], values);
1054  // add all available leaves
1055  for (i=1; i <= len[k]; ++i)
1056  available[i] = 1U << (32-i);
1057  // note that the above code treats the first case specially,
1058  // but it's really the same as the following code, so they
1059  // could probably be combined (except the initial code is 0,
1060  // and I use 0 in available[] to mean 'empty')
1061  for (i=k+1; i < n; ++i) {
1062  uint32 res;
1063  int z = len[i], y;
1064  if (z == NO_CODE) continue;
1065  // find lowest available leaf (should always be earliest,
1066  // which is what the specification calls for)
1067  // note that this property, and the fact we can never have
1068  // more than one free leaf at a given level, isn't totally
1069  // trivial to prove, but it seems true and the assert never
1070  // fires, so!
1071  while (z > 0 && !available[z]) --z;
1072  if (z == 0) { return FALSE; }
1073  res = available[z];
1074  assert(z >= 0 && z < 32);
1075  available[z] = 0;
1076  add_entry(c, bit_reverse(res), i, m++, len[i], values);
1077  // propogate availability up the tree
1078  if (z != len[i]) {
1079  assert(len[i] >= 0 && len[i] < 32);
1080  for (y=len[i]; y > z; --y) {
1081  assert(available[y] == 0);
1082  available[y] = res + (1 << (32-y));
1083  }
1084  }
1085  }
1086  return TRUE;
1087 }
1089 // accelerated huffman table allows fast O(1) match of all symbols
1092 {
1093  int i, len;
1094  for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1095  c->fast_huffman[i] = -1;
1097  len = c->sparse ? c->sorted_entries : c->entries;
1099  if (len > 32767) len = 32767; // largest possible value we can encode!
1100  #endif
1101  for (i=0; i < len; ++i) {
1103  uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1104  // set table entries for all bit combinations in the higher bits
1105  while (z < FAST_HUFFMAN_TABLE_SIZE) {
1106  c->fast_huffman[z] = i;
1107  z += 1 << c->codeword_lengths[i];
1108  }
1109  }
1110  }
1111 }
1113 #ifdef _MSC_VER
1114 #define STBV_CDECL __cdecl
1115 #else
1116 #define STBV_CDECL
1117 #endif
1119 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1120 {
1121  uint32 x = * (uint32 *) p;
1122  uint32 y = * (uint32 *) q;
1123  return x < y ? -1 : x > y;
1124 }
1126 static int include_in_sort(Codebook *c, uint8 len)
1127 {
1128  if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1129  if (len == NO_CODE) return FALSE;
1130  if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1131  return FALSE;
1132 }
1134 // if the fast table above doesn't work, we want to binary
1135 // search them... need to reverse the bits
1136 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1137 {
1138  int i, len;
1139  // build a list of all the entries
1140  // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1141  // this is kind of a frivolous optimization--I don't see any performance improvement,
1142  // but it's like 4 extra lines of code, so.
1143  if (!c->sparse) {
1144  int k = 0;
1145  for (i=0; i < c->entries; ++i)
1146  if (include_in_sort(c, lengths[i]))
1147  c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1148  assert(k == c->sorted_entries);
1149  } else {
1150  for (i=0; i < c->sorted_entries; ++i)
1151  c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1152  }
1154  qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1155  c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1157  len = c->sparse ? c->sorted_entries : c->entries;
1158  // now we need to indicate how they correspond; we could either
1159  // #1: sort a different data structure that says who they correspond to
1160  // #2: for each sorted entry, search the original list to find who corresponds
1161  // #3: for each original entry, find the sorted entry
1162  // #1 requires extra storage, #2 is slow, #3 can use binary search!
1163  for (i=0; i < len; ++i) {
1164  int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1165  if (include_in_sort(c,huff_len)) {
1166  uint32 code = bit_reverse(c->codewords[i]);
1167  int x=0, n=c->sorted_entries;
1168  while (n > 1) {
1169  // invariant: sc[x] <= code < sc[x+n]
1170  int m = x + (n >> 1);
1171  if (c->sorted_codewords[m] <= code) {
1172  x = m;
1173  n -= (n>>1);
1174  } else {
1175  n >>= 1;
1176  }
1177  }
1178  assert(c->sorted_codewords[x] == code);
1179  if (c->sparse) {
1180  c->sorted_values[x] = values[i];
1181  c->codeword_lengths[x] = huff_len;
1182  } else {
1183  c->sorted_values[x] = i;
1184  }
1185  }
1186  }
1187 }
1189 // only run while parsing the header (3 times)
1190 static int vorbis_validate(uint8 *data)
1191 {
1192  static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1193  return memcmp(data, vorbis, 6) == 0;
1194 }
1196 // called from setup only, once per code book
1197 // (formula implied by specification)
1198 static int lookup1_values(int entries, int dim)
1199 {
1200  int r = (int) floor(exp((float) log((float) entries) / dim));
1201  if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1202  ++r; // floor() to avoid _ftol() when non-CRT
1203  assert(pow((float) r+1, dim) > entries);
1204  assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1205  return r;
1206 }
1208 // called twice per file
1209 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1210 {
1211  int n4 = n >> 2, n8 = n >> 3;
1212  int k,k2;
1214  for (k=k2=0; k < n4; ++k,k2+=2) {
1215  A[k2 ] = (float) cos(4*k*M_PI/n);
1216  A[k2+1] = (float) -sin(4*k*M_PI/n);
1217  B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1218  B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1219  }
1220  for (k=k2=0; k < n8; ++k,k2+=2) {
1221  C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1222  C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1223  }
1224 }
1226 static void compute_window(int n, float *window)
1227 {
1228  int n2 = n >> 1, i;
1229  for (i=0; i < n2; ++i)
1230  window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1231 }
1233 static void compute_bitreverse(int n, uint16 *rev)
1234 {
1235  int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1236  int i, n8 = n >> 3;
1237  for (i=0; i < n8; ++i)
1238  rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1239 }
1241 static int init_blocksize(vorb *f, int b, int n)
1242 {
1243  int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1244  f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1245  f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1246  f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1247  if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1248  compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1249  f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1250  if (!f->window[b]) return error(f, VORBIS_outofmem);
1251  compute_window(n, f->window[b]);
1252  f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1253  if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1254  compute_bitreverse(n, f->bit_reverse[b]);
1255  return TRUE;
1256 }
1258 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1259 {
1260  int low = -1;
1261  int high = 65536;
1262  int i;
1263  for (i=0; i < n; ++i) {
1264  if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1265  if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1266  }
1267 }
1269 // this has been repurposed so y is now the original index instead of y
1270 typedef struct
1271 {
1273 } Point;
1275 static int STBV_CDECL point_compare(const void *p, const void *q)
1276 {
1277  Point *a = (Point *) p;
1278  Point *b = (Point *) q;
1279  return a->x < b->x ? -1 : a->x > b->x;
1280 }
1282 //
1286 #if defined(STB_VORBIS_NO_STDIO)
1287  #define USE_MEMORY(z) TRUE
1288 #else
1289  #define USE_MEMORY(z) ((z)->stream)
1290 #endif
1292 static uint8 get8(vorb *z)
1293 {
1294  if (USE_MEMORY(z)) {
1295  if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1296  return *z->stream++;
1297  }
1299  #ifndef STB_VORBIS_NO_STDIO
1300  {
1301  int c = fgetc(z->f);
1302  if (c == EOF) { z->eof = TRUE; return 0; }
1303  return c;
1304  }
1305  #endif
1306 }
1308 static uint32 get32(vorb *f)
1309 {
1310  uint32 x;
1311  x = get8(f);
1312  x += get8(f) << 8;
1313  x += get8(f) << 16;
1314  x += (uint32) get8(f) << 24;
1315  return x;
1316 }
1318 static int getn(vorb *z, uint8 *data, int n)
1319 {
1320  if (USE_MEMORY(z)) {
1321  if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1322  memcpy(data, z->stream, n);
1323  z->stream += n;
1324  return 1;
1325  }
1327  #ifndef STB_VORBIS_NO_STDIO
1328  if (fread(data, n, 1, z->f) == 1)
1329  return 1;
1330  else {
1331  z->eof = 1;
1332  return 0;
1333  }
1334  #endif
1335 }
1337 static void skip(vorb *z, int n)
1338 {
1339  if (USE_MEMORY(z)) {
1340  z->stream += n;
1341  if (z->stream >= z->stream_end) z->eof = 1;
1342  return;
1343  }
1344  #ifndef STB_VORBIS_NO_STDIO
1345  {
1346  long x = ftell(z->f);
1347  fseek(z->f, x+n, SEEK_SET);
1348  }
1349  #endif
1350 }
1352 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1353 {
1355  if (f->push_mode) return 0;
1356  #endif
1357  f->eof = 0;
1358  if (USE_MEMORY(f)) {
1359  if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1360  f->stream = f->stream_end;
1361  f->eof = 1;
1362  return 0;
1363  } else {
1364  f->stream = f->stream_start + loc;
1365  return 1;
1366  }
1367  }
1368  #ifndef STB_VORBIS_NO_STDIO
1369  if (loc + f->f_start < loc || loc >= 0x80000000) {
1370  loc = 0x7fffffff;
1371  f->eof = 1;
1372  } else {
1373  loc += f->f_start;
1374  }
1375  if (!fseek(f->f, loc, SEEK_SET))
1376  return 1;
1377  f->eof = 1;
1378  fseek(f->f, f->f_start, SEEK_END);
1379  return 0;
1380  #endif
1381 }
1384 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1386 static int capture_pattern(vorb *f)
1387 {
1388  if (0x4f != get8(f)) return FALSE;
1389  if (0x67 != get8(f)) return FALSE;
1390  if (0x67 != get8(f)) return FALSE;
1391  if (0x53 != get8(f)) return FALSE;
1392  return TRUE;
1393 }
1395 #define PAGEFLAG_continued_packet 1
1396 #define PAGEFLAG_first_page 2
1397 #define PAGEFLAG_last_page 4
1400 {
1401  uint32 loc0,loc1,n;
1402  // stream structure version
1403  if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1404  // header flag
1405  f->page_flag = get8(f);
1406  // absolute granule position
1407  loc0 = get32(f);
1408  loc1 = get32(f);
1409  // @TODO: validate loc0,loc1 as valid positions?
1410  // stream serial number -- vorbis doesn't interleave, so discard
1411  get32(f);
1412  //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1413  // page sequence number
1414  n = get32(f);
1415  f->last_page = n;
1416  // CRC32
1417  get32(f);
1418  // page_segments
1419  f->segment_count = get8(f);
1420  if (!getn(f, f->segments, f->segment_count))
1421  return error(f, VORBIS_unexpected_eof);
1422  // assume we _don't_ know any the sample position of any segments
1423  f->end_seg_with_known_loc = -2;
1424  if (loc0 != ~0U || loc1 != ~0U) {
1425  int i;
1426  // determine which packet is the last one that will complete
1427  for (i=f->segment_count-1; i >= 0; --i)
1428  if (f->segments[i] < 255)
1429  break;
1430  // 'i' is now the index of the _last_ segment of a packet that ends
1431  if (i >= 0) {
1432  f->end_seg_with_known_loc = i;
1433  f->known_loc_for_packet = loc0;
1434  }
1435  }
1436  if (f->first_decode) {
1437  int i,len;
1438  ProbedPage p;
1439  len = 0;
1440  for (i=0; i < f->segment_count; ++i)
1441  len += f->segments[i];
1442  len += 27 + f->segment_count;
1444  p.page_end = p.page_start + len;
1445  p.last_decoded_sample = loc0;
1446  f->p_first = p;
1447  }
1448  f->next_seg = 0;
1449  return TRUE;
1450 }
1452 static int start_page(vorb *f)
1453 {
1455  return start_page_no_capturepattern(f);
1456 }
1458 static int start_packet(vorb *f)
1459 {
1460  while (f->next_seg == -1) {
1461  if (!start_page(f)) return FALSE;
1464  }
1465  f->last_seg = FALSE;
1466  f->valid_bits = 0;
1467  f->packet_bytes = 0;
1468  f->bytes_in_seg = 0;
1469  // f->next_seg is now valid
1470  return TRUE;
1471 }
1474 {
1475  if (f->next_seg == -1) {
1476  int x = get8(f);
1477  if (f->eof) return FALSE; // EOF at page boundary is not an error!
1478  if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1479  if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1480  if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1481  if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1482  if (!start_page_no_capturepattern(f)) return FALSE;
1484  // set up enough state that we can read this packet if we want,
1485  // e.g. during recovery
1486  f->last_seg = FALSE;
1487  f->bytes_in_seg = 0;
1489  }
1490  }
1491  return start_packet(f);
1492 }
1494 static int next_segment(vorb *f)
1495 {
1496  int len;
1497  if (f->last_seg) return 0;
1498  if (f->next_seg == -1) {
1499  f->last_seg_which = f->segment_count-1; // in case start_page fails
1500  if (!start_page(f)) { f->last_seg = 1; return 0; }
1502  }
1503  len = f->segments[f->next_seg++];
1504  if (len < 255) {
1505  f->last_seg = TRUE;
1506  f->last_seg_which = f->next_seg-1;
1507  }
1508  if (f->next_seg >= f->segment_count)
1509  f->next_seg = -1;
1510  assert(f->bytes_in_seg == 0);
1511  f->bytes_in_seg = len;
1512  return len;
1513 }
1515 #define EOP (-1)
1516 #define INVALID_BITS (-1)
1518 static int get8_packet_raw(vorb *f)
1519 {
1520  if (!f->bytes_in_seg) { // CLANG!
1521  if (f->last_seg) return EOP;
1522  else if (!next_segment(f)) return EOP;
1523  }
1524  assert(f->bytes_in_seg > 0);
1525  --f->bytes_in_seg;
1526  ++f->packet_bytes;
1527  return get8(f);
1528 }
1530 static int get8_packet(vorb *f)
1531 {
1532  int x = get8_packet_raw(f);
1533  f->valid_bits = 0;
1534  return x;
1535 }
1537 static void flush_packet(vorb *f)
1538 {
1539  while (get8_packet_raw(f) != EOP);
1540 }
1542 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1543 // as the huffman decoder?
1544 static uint32 get_bits(vorb *f, int n)
1545 {
1546  uint32 z;
1548  if (f->valid_bits < 0) return 0;
1549  if (f->valid_bits < n) {
1550  if (n > 24) {
1551  // the accumulator technique below would not work correctly in this case
1552  z = get_bits(f, 24);
1553  z += get_bits(f, n-24) << 24;
1554  return z;
1555  }
1556  if (f->valid_bits == 0) f->acc = 0;
1557  while (f->valid_bits < n) {
1558  int z = get8_packet_raw(f);
1559  if (z == EOP) {
1560  f->valid_bits = INVALID_BITS;
1561  return 0;
1562  }
1563  f->acc += z << f->valid_bits;
1564  f->valid_bits += 8;
1565  }
1566  }
1567  if (f->valid_bits < 0) return 0;
1568  z = f->acc & ((1 << n)-1);
1569  f->acc >>= n;
1570  f->valid_bits -= n;
1571  return z;
1572 }
1574 // @OPTIMIZE: primary accumulator for huffman
1575 // expand the buffer to as many bits as possible without reading off end of packet
1576 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1577 // e.g. cache them locally and decode locally
1579 {
1580  if (f->valid_bits <= 24) {
1581  if (f->valid_bits == 0) f->acc = 0;
1582  do {
1583  int z;
1584  if (f->last_seg && !f->bytes_in_seg) return;
1585  z = get8_packet_raw(f);
1586  if (z == EOP) return;
1587  f->acc += (unsigned) z << f->valid_bits;
1588  f->valid_bits += 8;
1589  } while (f->valid_bits <= 24);
1590  }
1591 }
1593 enum
1594 {
1598 };
1601 {
1602  int i;
1603  prep_huffman(f);
1605  if (c->codewords == NULL && c->sorted_codewords == NULL)
1606  return -1;
1608  // cases to use binary search: sorted_codewords && !c->codewords
1609  // sorted_codewords && c->entries > 8
1610  if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1611  // binary search
1612  uint32 code = bit_reverse(f->acc);
1613  int x=0, n=c->sorted_entries, len;
1615  while (n > 1) {
1616  // invariant: sc[x] <= code < sc[x+n]
1617  int m = x + (n >> 1);
1618  if (c->sorted_codewords[m] <= code) {
1619  x = m;
1620  n -= (n>>1);
1621  } else {
1622  n >>= 1;
1623  }
1624  }
1625  // x is now the sorted index
1626  if (!c->sparse) x = c->sorted_values[x];
1627  // x is now sorted index if sparse, or symbol otherwise
1628  len = c->codeword_lengths[x];
1629  if (f->valid_bits >= len) {
1630  f->acc >>= len;
1631  f->valid_bits -= len;
1632  return x;
1633  }
1635  f->valid_bits = 0;
1636  return -1;
1637  }
1639  // if small, linear search
1640  assert(!c->sparse);
1641  for (i=0; i < c->entries; ++i) {
1642  if (c->codeword_lengths[i] == NO_CODE) continue;
1643  if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1644  if (f->valid_bits >= c->codeword_lengths[i]) {
1645  f->acc >>= c->codeword_lengths[i];
1646  f->valid_bits -= c->codeword_lengths[i];
1647  return i;
1648  }
1649  f->valid_bits = 0;
1650  return -1;
1651  }
1652  }
1655  f->valid_bits = 0;
1656  return -1;
1657 }
1661 #define DECODE_RAW(var, f,c) \
1662  if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1663  prep_huffman(f); \
1664  var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1665  var = c->fast_huffman[var]; \
1666  if (var >= 0) { \
1667  int n = c->codeword_lengths[var]; \
1668  f->acc >>= n; \
1669  f->valid_bits -= n; \
1670  if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1671  } else { \
1672  var = codebook_decode_scalar_raw(f,c); \
1673  }
1675 #else
1677 static int codebook_decode_scalar(vorb *f, Codebook *c)
1678 {
1679  int i;
1681  prep_huffman(f);
1682  // fast huffman table lookup
1683  i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1684  i = c->fast_huffman[i];
1685  if (i >= 0) {
1686  f->acc >>= c->codeword_lengths[i];
1687  f->valid_bits -= c->codeword_lengths[i];
1688  if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1689  return i;
1690  }
1691  return codebook_decode_scalar_raw(f,c);
1692 }
1694 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1696 #endif
1698 #define DECODE(var,f,c) \
1699  DECODE_RAW(var,f,c) \
1700  if (c->sparse) var = c->sorted_values[var];
1703  #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1704 #else
1705  #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1706 #endif
1713 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1714 // where we avoid one addition
1715 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1716 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1717 #define CODEBOOK_ELEMENT_BASE(c) (0)
1720 {
1721  int z = -1;
1723  // type 0 is only legal in a scalar context
1724  if (c->lookup_type == 0)
1726  else {
1727  DECODE_VQ(z,f,c);
1728  if (c->sparse) assert(z < c->sorted_entries);
1729  if (z < 0) { // check for EOP
1730  if (!f->bytes_in_seg)
1731  if (f->last_seg)
1732  return z;
1734  }
1735  }
1736  return z;
1737 }
1739 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1740 {
1741  int i,z = codebook_decode_start(f,c);
1742  if (z < 0) return FALSE;
1743  if (len > c->dimensions) len = c->dimensions;
1746  if (c->lookup_type == 1) {
1747  float last = CODEBOOK_ELEMENT_BASE(c);
1748  int div = 1;
1749  for (i=0; i < len; ++i) {
1750  int off = (z / div) % c->lookup_values;
1751  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1752  output[i] += val;
1753  if (c->sequence_p) last = val + c->minimum_value;
1754  div *= c->lookup_values;
1755  }
1756  return TRUE;
1757  }
1758 #endif
1760  z *= c->dimensions;
1761  if (c->sequence_p) {
1762  float last = CODEBOOK_ELEMENT_BASE(c);
1763  for (i=0; i < len; ++i) {
1764  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1765  output[i] += val;
1766  last = val + c->minimum_value;
1767  }
1768  } else {
1769  float last = CODEBOOK_ELEMENT_BASE(c);
1770  for (i=0; i < len; ++i) {
1771  output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1772  }
1773  }
1775  return TRUE;
1776 }
1778 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1779 {
1780  int i,z = codebook_decode_start(f,c);
1781  float last = CODEBOOK_ELEMENT_BASE(c);
1782  if (z < 0) return FALSE;
1783  if (len > c->dimensions) len = c->dimensions;
1786  if (c->lookup_type == 1) {
1787  int div = 1;
1788  for (i=0; i < len; ++i) {
1789  int off = (z / div) % c->lookup_values;
1790  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1791  output[i*step] += val;
1792  if (c->sequence_p) last = val;
1793  div *= c->lookup_values;
1794  }
1795  return TRUE;
1796  }
1797 #endif
1799  z *= c->dimensions;
1800  for (i=0; i < len; ++i) {
1801  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1802  output[i*step] += val;
1803  if (c->sequence_p) last = val;
1804  }
1806  return TRUE;
1807 }
1809 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1810 {
1811  int c_inter = *c_inter_p;
1812  int p_inter = *p_inter_p;
1813  int i,z, effective = c->dimensions;
1815  // type 0 is only legal in a scalar context
1816  if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1818  while (total_decode > 0) {
1819  float last = CODEBOOK_ELEMENT_BASE(c);
1820  DECODE_VQ(z,f,c);
1822  assert(!c->sparse || z < c->sorted_entries);
1823  #endif
1824  if (z < 0) {
1825  if (!f->bytes_in_seg)
1826  if (f->last_seg) return FALSE;
1827  return error(f, VORBIS_invalid_stream);
1828  }
1830  // if this will take us off the end of the buffers, stop short!
1831  // we check by computing the length of the virtual interleaved
1832  // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1833  // and the length we'll be using (effective)
1834  if (c_inter + p_inter*ch + effective > len * ch) {
1835  effective = len*ch - (p_inter*ch - c_inter);
1836  }
1839  if (c->lookup_type == 1) {
1840  int div = 1;
1841  for (i=0; i < effective; ++i) {
1842  int off = (z / div) % c->lookup_values;
1843  float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1844  if (outputs[c_inter])
1845  outputs[c_inter][p_inter] += val;
1846  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1847  if (c->sequence_p) last = val;
1848  div *= c->lookup_values;
1849  }
1850  } else
1851  #endif
1852  {
1853  z *= c->dimensions;
1854  if (c->sequence_p) {
1855  for (i=0; i < effective; ++i) {
1856  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1857  if (outputs[c_inter])
1858  outputs[c_inter][p_inter] += val;
1859  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1860  last = val;
1861  }
1862  } else {
1863  for (i=0; i < effective; ++i) {
1864  float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1865  if (outputs[c_inter])
1866  outputs[c_inter][p_inter] += val;
1867  if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1868  }
1869  }
1870  }
1872  total_decode -= effective;
1873  }
1874  *c_inter_p = c_inter;
1875  *p_inter_p = p_inter;
1876  return TRUE;
1877 }
1879 static int predict_point(int x, int x0, int x1, int y0, int y1)
1880 {
1881  int dy = y1 - y0;
1882  int adx = x1 - x0;
1883  // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1884  int err = abs(dy) * (x - x0);
1885  int off = err / adx;
1886  return dy < 0 ? y0 - off : y0 + off;
1887 }
1889 // the following table is block-copied from the specification
1890 static float inverse_db_table[256] =
1891 {
1892  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1893  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1894  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1895  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1896  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1897  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1898  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1899  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1900  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1901  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1902  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1903  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1904  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1905  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1906  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1907  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1908  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1909  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1910  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1911  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1912  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1913  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1914  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1915  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1916  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1917  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1918  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1919  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1920  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1921  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1922  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1923  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1924  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1925  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1926  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1927  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1928  0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1929  0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1930  0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1931  0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1932  0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1933  0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1934  0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1935  0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1936  0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1937  0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1938  0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1939  0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1940  0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1941  0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1942  0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1943  0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1944  0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1945  0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1946  0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1947  0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1948  0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1949  0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1950  0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1951  0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1952  0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1953  0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1954  0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1955  0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1956 };
1959 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1960 // note that you must produce bit-identical output to decode correctly;
1961 // this specific sequence of operations is specified in the spec (it's
1962 // drawing integer-quantized frequency-space lines that the encoder
1963 // expects to be exactly the same)
1964 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1965 // have to divide in the setup? sigh.
1967 #define LINE_OP(a,b) a *= b
1968 #else
1969 #define LINE_OP(a,b) a = b
1970 #endif
1973 #define DIVTAB_NUMER 32
1974 #define DIVTAB_DENOM 64
1975 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1976 #endif
1978 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1979 {
1980  int dy = y1 - y0;
1981  int adx = x1 - x0;
1982  int ady = abs(dy);
1983  int base;
1984  int x=x0,y=y0;
1985  int err = 0;
1986  int sy;
1989  if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1990  if (dy < 0) {
1991  base = -integer_divide_table[ady][adx];
1992  sy = base-1;
1993  } else {
1994  base = integer_divide_table[ady][adx];
1995  sy = base+1;
1996  }
1997  } else {
1998  base = dy / adx;
1999  if (dy < 0)
2000  sy = base - 1;
2001  else
2002  sy = base+1;
2003  }
2004 #else
2005  base = dy / adx;
2006  if (dy < 0)
2007  sy = base - 1;
2008  else
2009  sy = base+1;
2010 #endif
2011  ady -= abs(base) * adx;
2012  if (x1 > n) x1 = n;
2013  if (x < x1) {
2014  LINE_OP(output[x], inverse_db_table[y]);
2015  for (++x; x < x1; ++x) {
2016  err += ady;
2017  if (err >= adx) {
2018  err -= adx;
2019  y += sy;
2020  } else
2021  y += base;
2022  LINE_OP(output[x], inverse_db_table[y]);
2023  }
2024  }
2025 }
2027 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2028 {
2029  int k;
2030  if (rtype == 0) {
2031  int step = n / book->dimensions;
2032  for (k=0; k < step; ++k)
2033  if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2034  return FALSE;
2035  } else {
2036  for (k=0; k < n; ) {
2037  if (!codebook_decode(f, book, target+offset, n-k))
2038  return FALSE;
2039  k += book->dimensions;
2040  offset += book->dimensions;
2041  }
2042  }
2043  return TRUE;
2044 }
2046 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2047 {
2048  int i,j,pass;
2049  Residue *r = f->residue_config + rn;
2050  int rtype = f->residue_types[rn];
2051  int c = r->classbook;
2052  int classwords = f->codebooks[c].dimensions;
2053  int n_read = r->end - r->begin;
2054  int part_read = n_read / r->part_size;
2055  int temp_alloc_point = temp_alloc_save(f);
2057  uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2058  #else
2059  int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2060  #endif
2062  CHECK(f);
2064  for (i=0; i < ch; ++i)
2065  if (!do_not_decode[i])
2066  memset(residue_buffers[i], 0, sizeof(float) * n);
2068  if (rtype == 2 && ch != 1) {
2069  for (j=0; j < ch; ++j)
2070  if (!do_not_decode[j])
2071  break;
2072  if (j == ch)
2073  goto done;
2075  for (pass=0; pass < 8; ++pass) {
2076  int pcount = 0, class_set = 0;
2077  if (ch == 2) {
2078  while (pcount < part_read) {
2079  int z = r->begin + pcount*r->part_size;
2080  int c_inter = (z & 1), p_inter = z>>1;
2081  if (pass == 0) {
2082  Codebook *c = f->codebooks+r->classbook;
2083  int q;
2084  DECODE(q,f,c);
2085  if (q == EOP) goto done;
2087  part_classdata[0][class_set] = r->classdata[q];
2088  #else
2089  for (i=classwords-1; i >= 0; --i) {
2090  classifications[0][i+pcount] = q % r->classifications;
2091  q /= r->classifications;
2092  }
2093  #endif
2094  }
2095  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2096  int z = r->begin + pcount*r->part_size;
2098  int c = part_classdata[0][class_set][i];
2099  #else
2100  int c = classifications[0][pcount];
2101  #endif
2102  int b = r->residue_books[c][pass];
2103  if (b >= 0) {
2104  Codebook *book = f->codebooks + b;
2106  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2107  goto done;
2108  #else
2109  // saves 1%
2110  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2111  goto done;
2112  #endif
2113  } else {
2114  z += r->part_size;
2115  c_inter = z & 1;
2116  p_inter = z >> 1;
2117  }
2118  }
2120  ++class_set;
2121  #endif
2122  }
2123  } else if (ch == 1) {
2124  while (pcount < part_read) {
2125  int z = r->begin + pcount*r->part_size;
2126  int c_inter = 0, p_inter = z;
2127  if (pass == 0) {
2128  Codebook *c = f->codebooks+r->classbook;
2129  int q;
2130  DECODE(q,f,c);
2131  if (q == EOP) goto done;
2133  part_classdata[0][class_set] = r->classdata[q];
2134  #else
2135  for (i=classwords-1; i >= 0; --i) {
2136  classifications[0][i+pcount] = q % r->classifications;
2137  q /= r->classifications;
2138  }
2139  #endif
2140  }
2141  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2142  int z = r->begin + pcount*r->part_size;
2144  int c = part_classdata[0][class_set][i];
2145  #else
2146  int c = classifications[0][pcount];
2147  #endif
2148  int b = r->residue_books[c][pass];
2149  if (b >= 0) {
2150  Codebook *book = f->codebooks + b;
2151  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2152  goto done;
2153  } else {
2154  z += r->part_size;
2155  c_inter = 0;
2156  p_inter = z;
2157  }
2158  }
2160  ++class_set;
2161  #endif
2162  }
2163  } else {
2164  while (pcount < part_read) {
2165  int z = r->begin + pcount*r->part_size;
2166  int c_inter = z % ch, p_inter = z/ch;
2167  if (pass == 0) {
2168  Codebook *c = f->codebooks+r->classbook;
2169  int q;
2170  DECODE(q,f,c);
2171  if (q == EOP) goto done;
2173  part_classdata[0][class_set] = r->classdata[q];
2174  #else
2175  for (i=classwords-1; i >= 0; --i) {
2176  classifications[0][i+pcount] = q % r->classifications;
2177  q /= r->classifications;
2178  }
2179  #endif
2180  }
2181  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2182  int z = r->begin + pcount*r->part_size;
2184  int c = part_classdata[0][class_set][i];
2185  #else
2186  int c = classifications[0][pcount];
2187  #endif
2188  int b = r->residue_books[c][pass];
2189  if (b >= 0) {
2190  Codebook *book = f->codebooks + b;
2191  if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2192  goto done;
2193  } else {
2194  z += r->part_size;
2195  c_inter = z % ch;
2196  p_inter = z / ch;
2197  }
2198  }
2200  ++class_set;
2201  #endif
2202  }
2203  }
2204  }
2205  goto done;
2206  }
2207  CHECK(f);
2209  for (pass=0; pass < 8; ++pass) {
2210  int pcount = 0, class_set=0;
2211  while (pcount < part_read) {
2212  if (pass == 0) {
2213  for (j=0; j < ch; ++j) {
2214  if (!do_not_decode[j]) {
2215  Codebook *c = f->codebooks+r->classbook;
2216  int temp;
2217  DECODE(temp,f,c);
2218  if (temp == EOP) goto done;
2220  part_classdata[j][class_set] = r->classdata[temp];
2221  #else
2222  for (i=classwords-1; i >= 0; --i) {
2223  classifications[j][i+pcount] = temp % r->classifications;
2224  temp /= r->classifications;
2225  }
2226  #endif
2227  }
2228  }
2229  }
2230  for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2231  for (j=0; j < ch; ++j) {
2232  if (!do_not_decode[j]) {
2234  int c = part_classdata[j][class_set][i];
2235  #else
2236  int c = classifications[j][pcount];
2237  #endif
2238  int b = r->residue_books[c][pass];
2239  if (b >= 0) {
2240  float *target = residue_buffers[j];
2241  int offset = r->begin + pcount * r->part_size;
2242  int n = r->part_size;
2243  Codebook *book = f->codebooks + b;
2244  if (!residue_decode(f, book, target, offset, n, rtype))
2245  goto done;
2246  }
2247  }
2248  }
2249  }
2251  ++class_set;
2252  #endif
2253  }
2254  }
2255  done:
2256  CHECK(f);
2258 // temp_free(f,part_classdata);
2259  #else
2260  temp_free(f,classifications);
2261  #endif
2262  temp_alloc_restore(f,temp_alloc_point);
2263 }
2266 #if 0
2267 // slow way for debugging
2268 void inverse_mdct_slow(float *buffer, int n)
2269 {
2270  int i,j;
2271  int n2 = n >> 1;
2272  float *x = (float *) malloc(sizeof(*x) * n2);
2273  memcpy(x, buffer, sizeof(*x) * n2);
2274  for (i=0; i < n; ++i) {
2275  float acc = 0;
2276  for (j=0; j < n2; ++j)
2277  // formula from paper:
2278  //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2279  // formula from wikipedia
2280  //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2281  // these are equivalent, except the formula from the paper inverts the multiplier!
2282  // however, what actually works is NO MULTIPLIER!?!
2283  //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2284  acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2285  buffer[i] = acc;
2286  }
2287  free(x);
2288 }
2289 #elif 0
2290 // same as above, but just barely able to run in real time on modern machines
2291 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2292 {
2293  float mcos[16384];
2294  int i,j;
2295  int n2 = n >> 1, nmask = (n << 2) -1;
2296  float *x = (float *) malloc(sizeof(*x) * n2);
2297  memcpy(x, buffer, sizeof(*x) * n2);
2298  for (i=0; i < 4*n; ++i)
2299  mcos[i] = (float) cos(M_PI / 2 * i / n);
2301  for (i=0; i < n; ++i) {
2302  float acc = 0;
2303  for (j=0; j < n2; ++j)
2304  acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2305  buffer[i] = acc;
2306  }
2307  free(x);
2308 }
2309 #elif 0
2310 // transform to use a slow dct-iv; this is STILL basically trivial,
2311 // but only requires half as many ops
2312 void dct_iv_slow(float *buffer, int n)
2313 {
2314  float mcos[16384];
2315  float x[2048];
2316  int i,j;
2317  int n2 = n >> 1, nmask = (n << 3) - 1;
2318  memcpy(x, buffer, sizeof(*x) * n);
2319  for (i=0; i < 8*n; ++i)
2320  mcos[i] = (float) cos(M_PI / 4 * i / n);
2321  for (i=0; i < n; ++i) {
2322  float acc = 0;
2323  for (j=0; j < n; ++j)
2324  acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2325  buffer[i] = acc;
2326  }
2327 }
2329 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2330 {
2331  int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2332  float temp[4096];
2334  memcpy(temp, buffer, n2 * sizeof(float));
2335  dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2337  for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2338  for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2339  for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2340 }
2341 #endif
2343 #ifndef LIBVORBIS_MDCT
2344 #define LIBVORBIS_MDCT 0
2345 #endif
2348 // directly call the vorbis MDCT using an interface documented
2349 // by Jeff Roberts... useful for performance comparison
2350 typedef struct
2351 {
2352  int n;
2353  int log2n;
2355  float *trig;
2356  int *bitrev;
2358  float scale;
2359 } mdct_lookup;
2361 extern void mdct_init(mdct_lookup *lookup, int n);
2362 extern void mdct_clear(mdct_lookup *l);
2363 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2365 mdct_lookup M1,M2;
2367 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2368 {
2369  mdct_lookup *M;
2370  if (M1.n == n) M = &M1;
2371  else if (M2.n == n) M = &M2;
2372  else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2373  else {
2374  if (M2.n) __asm int 3;
2375  mdct_init(&M2, n);
2376  M = &M2;
2377  }
2379  mdct_backward(M, buffer, buffer);
2380 }
2381 #endif
2384 // the following were split out into separate functions while optimizing;
2385 // they could be pushed back up but eh. __forceinline showed no change;
2386 // they're probably already being inlined.
2387 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2388 {
2389  float *ee0 = e + i_off;
2390  float *ee2 = ee0 + k_off;
2391  int i;
2393  assert((n & 3) == 0);
2394  for (i=(n>>2); i > 0; --i) {
2395  float k00_20, k01_21;
2396  k00_20 = ee0[ 0] - ee2[ 0];
2397  k01_21 = ee0[-1] - ee2[-1];
2398  ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2399  ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2400  ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2401  ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2402  A += 8;
2404  k00_20 = ee0[-2] - ee2[-2];
2405  k01_21 = ee0[-3] - ee2[-3];
2406  ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2407  ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2408  ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2409  ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2410  A += 8;
2412  k00_20 = ee0[-4] - ee2[-4];
2413  k01_21 = ee0[-5] - ee2[-5];
2414  ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2415  ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2416  ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2417  ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2418  A += 8;
2420  k00_20 = ee0[-6] - ee2[-6];
2421  k01_21 = ee0[-7] - ee2[-7];
2422  ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2423  ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2424  ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2425  ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2426  A += 8;
2427  ee0 -= 8;
2428  ee2 -= 8;
2429  }
2430 }
2432 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2433 {
2434  int i;
2435  float k00_20, k01_21;
2437  float *e0 = e + d0;
2438  float *e2 = e0 + k_off;
2440  for (i=lim >> 2; i > 0; --i) {
2441  k00_20 = e0[-0] - e2[-0];
2442  k01_21 = e0[-1] - e2[-1];
2443  e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2444  e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2445  e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2446  e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2448  A += k1;
2450  k00_20 = e0[-2] - e2[-2];
2451  k01_21 = e0[-3] - e2[-3];
2452  e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2453  e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2454  e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2455  e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2457  A += k1;
2459  k00_20 = e0[-4] - e2[-4];
2460  k01_21 = e0[-5] - e2[-5];
2461  e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2462  e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2463  e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2464  e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2466  A += k1;
2468  k00_20 = e0[-6] - e2[-6];
2469  k01_21 = e0[-7] - e2[-7];
2470  e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2471  e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2472  e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2473  e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2475  e0 -= 8;
2476  e2 -= 8;
2478  A += k1;
2479  }
2480 }
2482 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2483 {
2484  int i;
2485  float A0 = A[0];
2486  float A1 = A[0+1];
2487  float A2 = A[0+a_off];
2488  float A3 = A[0+a_off+1];
2489  float A4 = A[0+a_off*2+0];
2490  float A5 = A[0+a_off*2+1];
2491  float A6 = A[0+a_off*3+0];
2492  float A7 = A[0+a_off*3+1];
2494  float k00,k11;
2496  float *ee0 = e +i_off;
2497  float *ee2 = ee0+k_off;
2499  for (i=n; i > 0; --i) {
2500  k00 = ee0[ 0] - ee2[ 0];
2501  k11 = ee0[-1] - ee2[-1];
2502  ee0[ 0] = ee0[ 0] + ee2[ 0];
2503  ee0[-1] = ee0[-1] + ee2[-1];
2504  ee2[ 0] = (k00) * A0 - (k11) * A1;
2505  ee2[-1] = (k11) * A0 + (k00) * A1;
2507  k00 = ee0[-2] - ee2[-2];
2508  k11 = ee0[-3] - ee2[-3];
2509  ee0[-2] = ee0[-2] + ee2[-2];
2510  ee0[-3] = ee0[-3] + ee2[-3];
2511  ee2[-2] = (k00) * A2 - (k11) * A3;
2512  ee2[-3] = (k11) * A2 + (k00) * A3;
2514  k00 = ee0[-4] - ee2[-4];
2515  k11 = ee0[-5] - ee2[-5];
2516  ee0[-4] = ee0[-4] + ee2[-4];
2517  ee0[-5] = ee0[-5] + ee2[-5];
2518  ee2[-4] = (k00) * A4 - (k11) * A5;
2519  ee2[-5] = (k11) * A4 + (k00) * A5;
2521  k00 = ee0[-6] - ee2[-6];
2522  k11 = ee0[-7] - ee2[-7];
2523  ee0[-6] = ee0[-6] + ee2[-6];
2524  ee0[-7] = ee0[-7] + ee2[-7];
2525  ee2[-6] = (k00) * A6 - (k11) * A7;
2526  ee2[-7] = (k11) * A6 + (k00) * A7;
2528  ee0 -= k0;
2529  ee2 -= k0;
2530  }
2531 }
2533 static __forceinline void iter_54(float *z)
2534 {
2535  float k00,k11,k22,k33;
2536  float y0,y1,y2,y3;
2538  k00 = z[ 0] - z[-4];
2539  y0 = z[ 0] + z[-4];
2540  y2 = z[-2] + z[-6];
2541  k22 = z[-2] - z[-6];
2543  z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2544  z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2546  // done with y0,y2
2548  k33 = z[-3] - z[-7];
2550  z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2551  z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2553  // done with k33
2555  k11 = z[-1] - z[-5];
2556  y1 = z[-1] + z[-5];
2557  y3 = z[-3] + z[-7];
2559  z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2560  z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2561  z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2562  z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2563 }
2565 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2566 {
2567  int a_off = base_n >> 3;
2568  float A2 = A[0+a_off];
2569  float *z = e + i_off;
2570  float *base = z - 16 * n;
2572  while (z > base) {
2573  float k00,k11;
2575  k00 = z[-0] - z[-8];
2576  k11 = z[-1] - z[-9];
2577  z[-0] = z[-0] + z[-8];
2578  z[-1] = z[-1] + z[-9];
2579  z[-8] = k00;
2580  z[-9] = k11 ;
2582  k00 = z[ -2] - z[-10];
2583  k11 = z[ -3] - z[-11];
2584  z[ -2] = z[ -2] + z[-10];
2585  z[ -3] = z[ -3] + z[-11];
2586  z[-10] = (k00+k11) * A2;
2587  z[-11] = (k11-k00) * A2;
2589  k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2590  k11 = z[ -5] - z[-13];
2591  z[ -4] = z[ -4] + z[-12];
2592  z[ -5] = z[ -5] + z[-13];
2593  z[-12] = k11;
2594  z[-13] = k00;
2596  k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2597  k11 = z[ -7] - z[-15];
2598  z[ -6] = z[ -6] + z[-14];
2599  z[ -7] = z[ -7] + z[-15];
2600  z[-14] = (k00+k11) * A2;
2601  z[-15] = (k00-k11) * A2;
2603  iter_54(z);
2604  iter_54(z-8);
2605  z -= 16;
2606  }
2607 }
2609 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2610 {
2611  int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2612  int ld;
2613  // @OPTIMIZE: reduce register pressure by using fewer variables?
2614  int save_point = temp_alloc_save(f);
2615  float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2616  float *u=NULL,*v=NULL;
2617  // twiddle factors
2618  float *A = f->A[blocktype];
2620  // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2621  // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2623  // kernel from paper
2626  // merged:
2627  // copy and reflect spectral data
2628  // step 0
2630  // note that it turns out that the items added together during
2631  // this step are, in fact, being added to themselves (as reflected
2632  // by step 0). inexplicable inefficiency! this became obvious
2633  // once I combined the passes.
2635  // so there's a missing 'times 2' here (for adding X to itself).
2636  // this propogates through linearly to the end, where the numbers
2637  // are 1/2 too small, and need to be compensated for.
2639  {
2640  float *d,*e, *AA, *e_stop;
2641  d = &buf2[n2-2];
2642  AA = A;
2643  e = &buffer[0];
2644  e_stop = &buffer[n2];
2645  while (e != e_stop) {
2646  d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2647  d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2648  d -= 2;
2649  AA += 2;
2650  e += 4;
2651  }
2653  e = &buffer[n2-3];
2654  while (d >= buf2) {
2655  d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2656  d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2657  d -= 2;
2658  AA += 2;
2659  e -= 4;
2660  }
2661  }
2663  // now we use symbolic names for these, so that we can
2664  // possibly swap their meaning as we change which operations
2665  // are in place
2667  u = buffer;
2668  v = buf2;
2670  // step 2 (paper output is w, now u)
2671  // this could be in place, but the data ends up in the wrong
2672  // place... _somebody_'s got to swap it, so this is nominated
2673  {
2674  float *AA = &A[n2-8];
2675  float *d0,*d1, *e0, *e1;
2677  e0 = &v[n4];
2678  e1 = &v[0];
2680  d0 = &u[n4];
2681  d1 = &u[0];
2683  while (AA >= A) {
2684  float v40_20, v41_21;
2686  v41_21 = e0[1] - e1[1];
2687  v40_20 = e0[0] - e1[0];
2688  d0[1] = e0[1] + e1[1];
2689  d0[0] = e0[0] + e1[0];
2690  d1[1] = v41_21*AA[4] - v40_20*AA[5];
2691  d1[0] = v40_20*AA[4] + v41_21*AA[5];
2693  v41_21 = e0[3] - e1[3];
2694  v40_20 = e0[2] - e1[2];
2695  d0[3] = e0[3] + e1[3];
2696  d0[2] = e0[2] + e1[2];
2697  d1[3] = v41_21*AA[0] - v40_20*AA[1];
2698  d1[2] = v40_20*AA[0] + v41_21*AA[1];
2700  AA -= 8;
2702  d0 += 4;
2703  d1 += 4;
2704  e0 += 4;
2705  e1 += 4;
2706  }
2707  }
2709  // step 3
2710  ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2712  // optimized step 3:
2714  // the original step3 loop can be nested r inside s or s inside r;
2715  // it's written originally as s inside r, but this is dumb when r
2716  // iterates many times, and s few. So I have two copies of it and
2717  // switch between them halfway.
2719  // this is iteration 0 of step 3
2720  imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2721  imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2723  // this is iteration 1 of step 3
2724  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2725  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2726  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2727  imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2729  l=2;
2730  for (; l < (ld-3)>>1; ++l) {
2731  int k0 = n >> (l+2), k0_2 = k0>>1;
2732  int lim = 1 << (l+1);
2733  int i;
2734  for (i=0; i < lim; ++i)
2735  imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2736  }
2738  for (; l < ld-6; ++l) {
2739  int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2740  int rlim = n >> (l+6), r;
2741  int lim = 1 << (l+1);
2742  int i_off;
2743  float *A0 = A;
2744  i_off = n2-1;
2745  for (r=rlim; r > 0; --r) {
2746  imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2747  A0 += k1*4;
2748  i_off -= 8;
2749  }
2750  }
2752  // iterations with count:
2753  // ld-6,-5,-4 all interleaved together
2754  // the big win comes from getting rid of needless flops
2755  // due to the constants on pass 5 & 4 being all 1 and 0;
2756  // combining them to be simultaneous to improve cache made little difference
2757  imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2759  // output is u
2761  // step 4, 5, and 6
2762  // cannot be in-place because of step 5
2763  {
2764  uint16 *bitrev = f->bit_reverse[blocktype];
2765  // weirdly, I'd have thought reading sequentially and writing
2766  // erratically would have been better than vice-versa, but in
2767  // fact that's not what my testing showed. (That is, with
2768  // j = bitreverse(i), do you read i and write j, or read j and write i.)
2770  float *d0 = &v[n4-4];
2771  float *d1 = &v[n2-4];
2772  while (d0 >= v) {
2773  int k4;
2775  k4 = bitrev[0];
2776  d1[3] = u[k4+0];
2777  d1[2] = u[k4+1];
2778  d0[3] = u[k4+2];
2779  d0[2] = u[k4+3];
2781  k4 = bitrev[1];
2782  d1[1] = u[k4+0];
2783  d1[0] = u[k4+1];
2784  d0[1] = u[k4+2];
2785  d0[0] = u[k4+3];
2787  d0 -= 4;
2788  d1 -= 4;
2789  bitrev += 2;
2790  }
2791  }
2792  // (paper output is u, now v)
2795  // data must be in buf2
2796  assert(v == buf2);
2798  // step 7 (paper output is v, now v)
2799  // this is now in place
2800  {
2801  float *C = f->C[blocktype];
2802  float *d, *e;
2804  d = v;
2805  e = v + n2 - 4;
2807  while (d < e) {
2808  float a02,a11,b0,b1,b2,b3;
2810  a02 = d[0] - e[2];
2811  a11 = d[1] + e[3];
2813  b0 = C[1]*a02 + C[0]*a11;
2814  b1 = C[1]*a11 - C[0]*a02;
2816  b2 = d[0] + e[ 2];
2817  b3 = d[1] - e[ 3];
2819  d[0] = b2 + b0;
2820  d[1] = b3 + b1;
2821  e[2] = b2 - b0;
2822  e[3] = b1 - b3;
2824  a02 = d[2] - e[0];
2825  a11 = d[3] + e[1];
2827  b0 = C[3]*a02 + C[2]*a11;
2828  b1 = C[3]*a11 - C[2]*a02;
2830  b2 = d[2] + e[ 0];
2831  b3 = d[3] - e[ 1];
2833  d[2] = b2 + b0;
2834  d[3] = b3 + b1;
2835  e[0] = b2 - b0;
2836  e[1] = b1 - b3;
2838  C += 4;
2839  d += 4;
2840  e -= 4;
2841  }
2842  }
2844  // data must be in buf2
2847  // step 8+decode (paper output is X, now buffer)
2848  // this generates pairs of data a la 8 and pushes them directly through
2849  // the decode kernel (pushing rather than pulling) to avoid having
2850  // to make another pass later
2852  // this cannot POSSIBLY be in place, so we refer to the buffers directly
2854  {
2855  float *d0,*d1,*d2,*d3;
2857  float *B = f->B[blocktype] + n2 - 8;
2858  float *e = buf2 + n2 - 8;
2859  d0 = &buffer[0];
2860  d1 = &buffer[n2-4];
2861  d2 = &buffer[n2];
2862  d3 = &buffer[n-4];
2863  while (e >= v) {
2864  float p0,p1,p2,p3;
2866  p3 = e[6]*B[7] - e[7]*B[6];
2867  p2 = -e[6]*B[6] - e[7]*B[7];
2869  d0[0] = p3;
2870  d1[3] = - p3;
2871  d2[0] = p2;
2872  d3[3] = p2;
2874  p1 = e[4]*B[5] - e[5]*B[4];
2875  p0 = -e[4]*B[4] - e[5]*B[5];
2877  d0[1] = p1;
2878  d1[2] = - p1;
2879  d2[1] = p0;
2880  d3[2] = p0;
2882  p3 = e[2]*B[3] - e[3]*B[2];
2883  p2 = -e[2]*B[2] - e[3]*B[3];
2885  d0[2] = p3;
2886  d1[1] = - p3;
2887  d2[2] = p2;
2888  d3[1] = p2;
2890  p1 = e[0]*B[1] - e[1]*B[0];
2891  p0 = -e[0]*B[0] - e[1]*B[1];
2893  d0[3] = p1;
2894  d1[0] = - p1;
2895  d2[3] = p0;
2896  d3[0] = p0;
2898  B -= 8;
2899  e -= 8;
2900  d0 += 4;
2901  d2 += 4;
2902  d1 -= 4;
2903  d3 -= 4;
2904  }
2905  }
2907 // temp_free(f,buf2);
2908  temp_alloc_restore(f,save_point);
2909 }
2911 #if 0
2912 // this is the original version of the above code, if you want to optimize it from scratch
2913 void inverse_mdct_naive(float *buffer, int n)
2914 {
2915  float s;
2916  float A[1 << 12], B[1 << 12], C[1 << 11];
2917  int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2918  int n3_4 = n - n4, ld;
2919  // how can they claim this only uses N words?!
2920  // oh, because they're only used sparsely, whoops
2921  float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2922  // set up twiddle factors
2924  for (k=k2=0; k < n4; ++k,k2+=2) {
2925  A[k2 ] = (float) cos(4*k*M_PI/n);
2926  A[k2+1] = (float) -sin(4*k*M_PI/n);
2927  B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2928  B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2929  }
2930  for (k=k2=0; k < n8; ++k,k2+=2) {
2931  C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2932  C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2933  }
2935  // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2936  // Note there are bugs in that pseudocode, presumably due to them attempting
2937  // to rename the arrays nicely rather than representing the way their actual
2938  // implementation bounces buffers back and forth. As a result, even in the
2939  // "some formulars corrected" version, a direct implementation fails. These
2940  // are noted below as "paper bug".
2942  // copy and reflect spectral data
2943  for (k=0; k < n2; ++k) u[k] = buffer[k];
2944  for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2945  // kernel from paper
2946  // step 1
2947  for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2948  v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2949  v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2950  }
2951  // step 2
2952  for (k=k4=0; k < n8; k+=1, k4+=4) {
2953  w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2954  w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2955  w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2956  w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2957  }
2958  // step 3
2959  ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2960  for (l=0; l < ld-3; ++l) {
2961  int k0 = n >> (l+2), k1 = 1 << (l+3);
2962  int rlim = n >> (l+4), r4, r;
2963  int s2lim = 1 << (l+2), s2;
2964  for (r=r4=0; r < rlim; r4+=4,++r) {
2965  for (s2=0; s2 < s2lim; s2+=2) {
2966  u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2967  u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2968  u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2969  - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2970  u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2971  + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2972  }
2973  }
2974  if (l+1 < ld-3) {
2975  // paper bug: ping-ponging of u&w here is omitted
2976  memcpy(w, u, sizeof(u));
2977  }
2978  }
2980  // step 4
2981  for (i=0; i < n8; ++i) {
2982  int j = bit_reverse(i) >> (32-ld+3);
2983  assert(j < n8);
2984  if (i == j) {
2985  // paper bug: original code probably swapped in place; if copying,
2986  // need to directly copy in this case
2987  int i8 = i << 3;
2988  v[i8+1] = u[i8+1];
2989  v[i8+3] = u[i8+3];
2990  v[i8+5] = u[i8+5];
2991  v[i8+7] = u[i8+7];
2992  } else if (i < j) {
2993  int i8 = i << 3, j8 = j << 3;
2994  v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2995  v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2996  v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2997  v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2998  }
2999  }
3000  // step 5
3001  for (k=0; k < n2; ++k) {
3002  w[k] = v[k*2+1];
3003  }
3004  // step 6
3005  for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3006  u[n-1-k2] = w[k4];
3007  u[n-2-k2] = w[k4+1];
3008  u[n3_4 - 1 - k2] = w[k4+2];
3009  u[n3_4 - 2 - k2] = w[k4+3];
3010  }
3011  // step 7
3012  for (k=k2=0; k < n8; ++k, k2 += 2) {
3013  v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3014  v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3015  v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3016  v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3017  }
3018  // step 8
3019  for (k=k2=0; k < n4; ++k,k2 += 2) {
3020  X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3021  X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3022  }
3024  // decode kernel to output
3025  // determined the following value experimentally
3026  // (by first figuring out what made inverse_mdct_slow work); then matching that here
3027  // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3028  s = 0.5; // theoretically would be n4
3030  // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3031  // so it needs to use the "old" B values to behave correctly, or else
3032  // set s to 1.0 ]]]
3033  for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3034  for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3035  for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3036 }
3037 #endif
3039 static float *get_window(vorb *f, int len)
3040 {
3041  len <<= 1;
3042  if (len == f->blocksize_0) return f->window[0];
3043  if (len == f->blocksize_1) return f->window[1];
3044  assert(0);
3045  return NULL;
3046 }
3049 typedef int16 YTYPE;
3050 #else
3051 typedef int YTYPE;
3052 #endif
3053 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3054 {
3055  int n2 = n >> 1;
3056  int s = map->chan[i].mux, floor;
3057  floor = map->submap_floor[s];
3058  if (f->floor_types[floor] == 0) {
3059  return error(f, VORBIS_invalid_stream);
3060  } else {
3061  Floor1 *g = &f->floor_config[floor].floor1;
3062  int j,q;
3063  int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3064  for (q=1; q < g->values; ++q) {
3065  j = g->sorted_order[q];
3067  if (finalY[j] >= 0)
3068  #else
3069  if (step2_flag[j])
3070  #endif
3071  {
3072  int hy = finalY[j] * g->floor1_multiplier;
3073  int hx = g->Xlist[j];
3074  if (lx != hx)
3075  draw_line(target, lx,ly, hx,hy, n2);
3076  CHECK(f);
3077  lx = hx, ly = hy;
3078  }
3079  }
3080  if (lx < n2) {
3081  // optimization of: draw_line(target, lx,ly, n,ly, n2);
3082  for (j=lx; j < n2; ++j)
3083  LINE_OP(target[j], inverse_db_table[ly]);
3084  CHECK(f);
3085  }
3086  }
3087  return TRUE;
3088 }
3090 // The meaning of "left" and "right"
3091 //
3092 // For a given frame:
3093 // we compute samples from 0..n
3094 // window_center is n/2
3095 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3096 // all of the samples from left_end to right_start can be output without mixing; however,
3097 // this interval is 0-length except when transitioning between short and long frames
3098 // all of the samples from right_start to right_end need to be mixed with the next frame,
3099 // which we don't have, so those get saved in a buffer
3100 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3101 // has to be the same as frame N+1's left_end-left_start (which they are by
3102 // construction)
3104 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3105 {
3106  Mode *m;
3107  int i, n, prev, next, window_center;
3110  retry:
3111  if (f->eof) return FALSE;
3112  if (!maybe_start_packet(f))
3113  return FALSE;
3114  // check packet type
3115  if (get_bits(f,1) != 0) {
3116  if (IS_PUSH_MODE(f))
3117  return error(f,VORBIS_bad_packet_type);
3118  while (EOP != get8_packet(f));
3119  goto retry;
3120  }
3122  if (f->alloc.alloc_buffer)
3125  i = get_bits(f, ilog(f->mode_count-1));
3126  if (i == EOP) return FALSE;
3127  if (i >= f->mode_count) return FALSE;
3128  *mode = i;
3129  m = f->mode_config + i;
3130  if (m->blockflag) {
3131  n = f->blocksize_1;
3132  prev = get_bits(f,1);
3133  next = get_bits(f,1);
3134  } else {
3135  prev = next = 0;
3136  n = f->blocksize_0;
3137  }
3141  window_center = n >> 1;
3142  if (m->blockflag && !prev) {
3143  *p_left_start = (n - f->blocksize_0) >> 2;
3144  *p_left_end = (n + f->blocksize_0) >> 2;
3145  } else {
3146  *p_left_start = 0;
3147  *p_left_end = window_center;
3148  }
3149  if (m->blockflag && !next) {
3150  *p_right_start = (n*3 - f->blocksize_0) >> 2;
3151  *p_right_end = (n*3 + f->blocksize_0) >> 2;
3152  } else {
3153  *p_right_start = window_center;
3154  *p_right_end = n;
3155  }
3157  return TRUE;
3158 }
3160 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3161 {
3162  Mapping *map;
3163  int i,j,k,n,n2;
3164  int zero_channel[256];
3165  int really_zero_channel[256];
3169  n = f->blocksize[m->blockflag];
3170  map = &f->mapping[m->mapping];
3172 // FLOORS
3173  n2 = n >> 1;
3175  CHECK(f);
3177  for (i=0; i < f->channels; ++i) {
3178  int s = map->chan[i].mux, floor;
3179  zero_channel[i] = FALSE;
3180  floor = map->submap_floor[s];
3181  if (f->floor_types[floor] == 0) {
3182  return error(f, VORBIS_invalid_stream);
3183  } else {
3184  Floor1 *g = &f->floor_config[floor].floor1;
3185  if (get_bits(f, 1)) {
3186  short *finalY;
3187  uint8 step2_flag[256];
3188  static int range_list[4] = { 256, 128, 86, 64 };
3189  int range = range_list[g->floor1_multiplier-1];
3190  int offset = 2;
3191  finalY = f->finalY[i];
3192  finalY[0] = get_bits(f, ilog(range)-1);
3193  finalY[1] = get_bits(f, ilog(range)-1);
3194  for (j=0; j < g->partitions; ++j) {
3195  int pclass = g->partition_class_list[j];
3196  int cdim = g->class_dimensions[pclass];
3197  int cbits = g->class_subclasses[pclass];
3198  int csub = (1 << cbits)-1;
3199  int cval = 0;
3200  if (cbits) {
3201  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3202  DECODE(cval,f,c);
3203  }
3204  for (k=0; k < cdim; ++k) {
3205  int book = g->subclass_books[pclass][cval & csub];
3206  cval = cval >> cbits;
3207  if (book >= 0) {
3208  int temp;
3209  Codebook *c = f->codebooks + book;
3210  DECODE(temp,f,c);
3211  finalY[offset++] = temp;
3212  } else
3213  finalY[offset++] = 0;
3214  }
3215  }
3216  if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3217  step2_flag[0] = step2_flag[1] = 1;
3218  for (j=2; j < g->values; ++j) {
3219  int low, high, pred, highroom, lowroom, room, val;
3220  low = g->neighbors[j][0];
3221  high = g->neighbors[j][1];
3222  //neighbors(g->Xlist, j, &low, &high);
3223  pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3224  val = finalY[j];
3225  highroom = range - pred;
3226  lowroom = pred;
3227  if (highroom < lowroom)
3228  room = highroom * 2;
3229  else
3230  room = lowroom * 2;
3231  if (val) {
3232  step2_flag[low] = step2_flag[high] = 1;
3233  step2_flag[j] = 1;
3234  if (val >= room)
3235  if (highroom > lowroom)
3236  finalY[j] = val - lowroom + pred;
3237  else
3238  finalY[j] = pred - val + highroom - 1;
3239  else
3240  if (val & 1)
3241  finalY[j] = pred - ((val+1)>>1);
3242  else
3243  finalY[j] = pred + (val>>1);
3244  } else {
3245  step2_flag[j] = 0;
3246  finalY[j] = pred;
3247  }
3248  }
3251  do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3252 #else
3253  // defer final floor computation until _after_ residue
3254  for (j=0; j < g->values; ++j) {
3255  if (!step2_flag[j])
3256  finalY[j] = -1;
3257  }
3258 #endif
3259  } else {
3260  error:
3261  zero_channel[i] = TRUE;
3262  }
3263  // So we just defer everything else to later
3265  // at this point we've decoded the floor into buffer
3266  }
3267  }
3268  CHECK(f);
3269  // at this point we've decoded all floors
3271  if (f->alloc.alloc_buffer)
3274  // re-enable coupled channels if necessary
3275  memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3276  for (i=0; i < map->coupling_steps; ++i)
3277  if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3278  zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3279  }
3281  CHECK(f);
3283  for (i=0; i < map->submaps; ++i) {
3284  float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3285  int r;
3286  uint8 do_not_decode[256];
3287  int ch = 0;
3288  for (j=0; j < f->channels; ++j) {
3289  if (map->chan[j].mux == i) {
3290  if (zero_channel[j]) {
3291  do_not_decode[ch] = TRUE;
3292  residue_buffers[ch] = NULL;
3293  } else {
3294  do_not_decode[ch] = FALSE;
3295  residue_buffers[ch] = f->channel_buffers[j];
3296  }
3297  ++ch;
3298  }
3299  }
3300  r = map->submap_residue[i];
3301  decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3302  }
3304  if (f->alloc.alloc_buffer)
3306  CHECK(f);
3309  for (i = map->coupling_steps-1; i >= 0; --i) {
3310  int n2 = n >> 1;
3311  float *m = f->channel_buffers[map->chan[i].magnitude];
3312  float *a = f->channel_buffers[map->chan[i].angle ];
3313  for (j=0; j < n2; ++j) {
3314  float a2,m2;
3315  if (m[j] > 0)
3316  if (a[j] > 0)
3317  m2 = m[j], a2 = m[j] - a[j];
3318  else
3319  a2 = m[j], m2 = m[j] + a[j];
3320  else
3321  if (a[j] > 0)
3322  m2 = m[j], a2 = m[j] + a[j];
3323  else
3324  a2 = m[j], m2 = m[j] - a[j];
3325  m[j] = m2;
3326  a[j] = a2;
3327  }
3328  }
3329  CHECK(f);
3331  // finish decoding the floors
3333  for (i=0; i < f->channels; ++i) {
3334  if (really_zero_channel[i]) {
3335  memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3336  } else {
3337  do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3338  }
3339  }
3340 #else
3341  for (i=0; i < f->channels; ++i) {
3342  if (really_zero_channel[i]) {
3343  memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3344  } else {
3345  for (j=0; j < n2; ++j)
3346  f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3347  }
3348  }
3349 #endif
3352  CHECK(f);
3353  for (i=0; i < f->channels; ++i)
3354  inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3355  CHECK(f);
3357  // this shouldn't be necessary, unless we exited on an error
3358  // and want to flush to get to the next packet
3359  flush_packet(f);
3361  if (f->first_decode) {
3362  // assume we start so first non-discarded sample is sample 0
3363  // this isn't to spec, but spec would require us to read ahead
3364  // and decode the size of all current frames--could be done,
3365  // but presumably it's not a commonly used feature
3366  f->current_loc = -n2; // start of first frame is positioned for discard
3367  // we might have to discard samples "from" the next frame too,
3368  // if we're lapping a large block then a small at the start?
3369  f->discard_samples_deferred = n - right_end;
3370  f->current_loc_valid = TRUE;
3371  f->first_decode = FALSE;
3372  } else if (f->discard_samples_deferred) {
3373  if (f->discard_samples_deferred >= right_start - left_start) {
3374  f->discard_samples_deferred -= (right_start - left_start);
3375  left_start = right_start;
3376  *p_left = left_start;
3377  } else {
3378  left_start += f->discard_samples_deferred;
3379  *p_left = left_start;
3380  f->discard_samples_deferred = 0;
3381  }
3382  } else if (f->previous_length == 0 && f->current_loc_valid) {
3383  // we're recovering from a seek... that means we're going to discard
3384  // the samples from this packet even though we know our position from
3385  // the last page header, so we need to update the position based on
3386  // the discarded samples here
3387  // but wait, the code below is going to add this in itself even
3388  // on a discard, so we don't need to do it here...
3389  }
3391  // check if we have ogg information about the sample # for this packet
3392  if (f->last_seg_which == f->end_seg_with_known_loc) {
3393  // if we have a valid current loc, and this is final:
3394  if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3395  uint32 current_end = f->known_loc_for_packet - (n-right_end);
3396  // then let's infer the size of the (probably) short final frame
3397  if (current_end < f->current_loc + (right_end-left_start)) {
3398  if (current_end < f->current_loc) {
3399  // negative truncation, that's impossible!
3400  *len = 0;
3401  } else {
3402  *len = current_end - f->current_loc;
3403  }
3404  *len += left_start;
3405  if (*len > right_end) *len = right_end; // this should never happen
3406  f->current_loc += *len;
3407  return TRUE;
3408  }
3409  }
3410  // otherwise, just set our sample loc
3411  // guess that the ogg granule pos refers to the _middle_ of the
3412  // last frame?
3413  // set f->current_loc to the position of left_start
3414  f->current_loc = f->known_loc_for_packet - (n2-left_start);
3415  f->current_loc_valid = TRUE;
3416  }
3417  if (f->current_loc_valid)
3418  f->current_loc += (right_start - left_start);
3420  if (f->alloc.alloc_buffer)
3422  *len = right_end; // ignore samples after the window goes to 0
3423  CHECK(f);
3425  return TRUE;
3426 }
3428 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3429 {
3430  int mode, left_end, right_end;
3431  if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3432  return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3433 }
3435 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3436 {
3437  int prev,i,j;
3438  // we use right&left (the start of the right- and left-window sin()-regions)
3439  // to determine how much to return, rather than inferring from the rules
3440  // (same result, clearer code); 'left' indicates where our sin() window
3441  // starts, therefore where the previous window's right edge starts, and
3442  // therefore where to start mixing from the previous buffer. 'right'
3443  // indicates where our sin() ending-window starts, therefore that's where
3444  // we start saving, and where our returned-data ends.
3446  // mixin from previous window
3447  if (f->previous_length) {
3448  int i,j, n = f->previous_length;
3449  float *w = get_window(f, n);
3450  for (i=0; i < f->channels; ++i) {
3451  for (j=0; j < n; ++j)
3452  f->channel_buffers[i][left+j] =
3453  f->channel_buffers[i][left+j]*w[ j] +
3454  f->previous_window[i][ j]*w[n-1-j];
3455  }
3456  }
3458  prev = f->previous_length;
3460  // last half of this data becomes previous window
3461  f->previous_length = len - right;
3463  // @OPTIMIZE: could avoid this copy by double-buffering the
3464  // output (flipping previous_window with channel_buffers), but
3465  // then previous_window would have to be 2x as large, and
3466  // channel_buffers couldn't be temp mem (although they're NOT
3467  // currently temp mem, they could be (unless we want to level
3468  // performance by spreading out the computation))
3469  for (i=0; i < f->channels; ++i)
3470  for (j=0; right+j < len; ++j)
3471  f->previous_window[i][j] = f->channel_buffers[i][right+j];
3473  if (!prev)
3474  // there was no previous packet, so this data isn't valid...
3475  // this isn't entirely true, only the would-have-overlapped data
3476  // isn't valid, but this seems to be what the spec requires
3477  return 0;
3479  // truncate a short frame
3480  if (len < right) right = len;
3482  f->samples_output += right-left;
3484  return right - left;
3485 }
3488 {
3489  int len, right, left;
3490  if (vorbis_decode_packet(f, &len, &left, &right))
3491  vorbis_finish_frame(f, len, left, right);
3492 }
3495 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3496 {
3497  // make sure that we have the packet available before continuing...
3498  // this requires a full ogg parse, but we know we can fetch from f->stream
3500  // instead of coding this out explicitly, we could save the current read state,
3501  // read the next packet with get8() until end-of-packet, check f->eof, then
3502  // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3503  // of state to restore (primarily the page segment table)
3505  int s = f->next_seg, first = TRUE;
3506  uint8 *p = f->stream;
3508  if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3509  for (; s < f->segment_count; ++s) {
3510  p += f->segments[s];
3511  if (f->segments[s] < 255) // stop at first short segment
3512  break;
3513  }
3514  // either this continues, or it ends it...
3515  if (end_page)
3516  if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3517  if (s == f->segment_count)
3518  s = -1; // set 'crosses page' flag
3519  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3520  first = FALSE;
3521  }
3522  for (; s == -1;) {
3523  uint8 *q;
3524  int n;
3526  // check that we have the page header ready
3527  if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3528  // validate the page
3529  if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3530  if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3531  if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3532  if (f->previous_length)
3533  if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3534  // if no previous length, we're resynching, so we can come in on a continued-packet,
3535  // which we'll just drop
3536  } else {
3537  if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3538  }
3539  n = p[26]; // segment counts
3540  q = p+27; // q points to segment table
3541  p = q + n; // advance past header
3542  // make sure we've read the segment table
3543  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3544  for (s=0; s < n; ++s) {
3545  p += q[s];
3546  if (q[s] < 255)
3547  break;
3548  }
3549  if (end_page)
3550  if (s < n-1) return error(f, VORBIS_invalid_stream);
3551  if (s == n)
3552  s = -1; // set 'crosses page' flag
3553  if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3554  first = FALSE;
3555  }
3556  return TRUE;
3557 }
3560 static int start_decoder(vorb *f)
3561 {
3562  uint8 header[6], x,y;
3563  int len,i,j,k, max_submaps = 0;
3564  int longest_floorlist=0;
3566  // first page, first packet
3568  if (!start_page(f)) return FALSE;
3569  // validate page flag
3573  // check for expected packet length
3574  if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3575  if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3576  // read packet
3577  // check packet header
3578  if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3579  if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3580  if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3581  // vorbis_version
3582  if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3583  f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3585  f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3586  get32(f); // bitrate_maximum
3587  get32(f); // bitrate_nominal
3588  get32(f); // bitrate_minimum
3589  x = get8(f);
3590  {
3591  int log0,log1;
3592  log0 = x & 15;
3593  log1 = x >> 4;
3594  f->blocksize_0 = 1 << log0;
3595  f->blocksize_1 = 1 << log1;
3596  if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3597  if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3598  if (log0 > log1) return error(f, VORBIS_invalid_setup);
3599  }
3601  // framing_flag
3602  x = get8(f);
3603  if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3605  // second packet!
3606  if (!start_page(f)) return FALSE;
3608  if (!start_packet(f)) return FALSE;
3609  do {
3610  len = next_segment(f);
3611  skip(f, len);
3612  f->bytes_in_seg = 0;
3613  } while (len);
3615  // third packet!
3616  if (!start_packet(f)) return FALSE;
3619  if (IS_PUSH_MODE(f)) {
3620  if (!is_whole_packet_present(f, TRUE)) {
3621  // convert error in ogg header to write type
3622  if (f->error == VORBIS_invalid_stream)
3624  return FALSE;
3625  }
3626  }
3627  #endif
3629  crc32_init(); // always init it, to avoid multithread race conditions
3632  for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3633  if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3635  // codebooks
3637  f->codebook_count = get_bits(f,8) + 1;
3638  f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3639  if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3640  memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3641  for (i=0; i < f->codebook_count; ++i) {
3642  uint32 *values;
3643  int ordered, sorted_count;
3644  int total=0;
3645  uint8 *lengths;
3646  Codebook *c = f->codebooks+i;
3647  CHECK(f);
3648  x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3649  x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3650  x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3651  x = get_bits(f, 8);
3652  c->dimensions = (get_bits(f, 8)<<8) + x;
3653  x = get_bits(f, 8);
3654  y = get_bits(f, 8);
3655  c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3656  ordered = get_bits(f,1);
3657  c->sparse = ordered ? 0 : get_bits(f,1);
3659  if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3661  if (c->sparse)
3662  lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3663  else
3664  lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3666  if (!lengths) return error(f, VORBIS_outofmem);
3668  if (ordered) {
3669  int current_entry = 0;
3670  int current_length = get_bits(f,5) + 1;
3671  while (current_entry < c->entries) {
3672  int limit = c->entries - current_entry;
3673  int n = get_bits(f, ilog(limit));
3674  if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3675  memset(lengths + current_entry, current_length, n);
3676  current_entry += n;
3677  ++current_length;
3678  }
3679  } else {
3680  for (j=0; j < c->entries; ++j) {
3681  int present = c->sparse ? get_bits(f,1) : 1;
3682  if (present) {
3683  lengths[j] = get_bits(f, 5) + 1;
3684  ++total;
3685  if (lengths[j] == 32)
3686  return error(f, VORBIS_invalid_setup);
3687  } else {
3688  lengths[j] = NO_CODE;
3689  }
3690  }
3691  }
3693  if (c->sparse && total >= c->entries >> 2) {
3694  // convert sparse items to non-sparse!
3695  if (c->entries > (int) f->setup_temp_memory_required)
3698  c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3699  if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3700  memcpy(c->codeword_lengths, lengths, c->entries);
3701  setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3702  lengths = c->codeword_lengths;
3703  c->sparse = 0;
3704  }
3706  // compute the size of the sorted tables
3707  if (c->sparse) {
3708  sorted_count = total;
3709  } else {
3710  sorted_count = 0;
3712  for (j=0; j < c->entries; ++j)
3713  if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3714  ++sorted_count;
3715  #endif
3716  }
3718  c->sorted_entries = sorted_count;
3719  values = NULL;
3721  CHECK(f);
3722  if (!c->sparse) {
3723  c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3724  if (!c->codewords) return error(f, VORBIS_outofmem);
3725  } else {
3726  unsigned int size;
3727  if (c->sorted_entries) {
3729  if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3730  c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3731  if (!c->codewords) return error(f, VORBIS_outofmem);
3732  values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3733  if (!values) return error(f, VORBIS_outofmem);
3734  }
3735  size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3736  if (size > f->setup_temp_memory_required)
3737  f->setup_temp_memory_required = size;
3738  }
3740  if (!compute_codewords(c, lengths, c->entries, values)) {
3741  if (c->sparse) setup_temp_free(f, values, 0);
3742  return error(f, VORBIS_invalid_setup);
3743  }
3745  if (c->sorted_entries) {
3746  // allocate an extra slot for sentinels
3747  c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3748  if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3749  // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3750  // so that we can catch that case without an extra if
3751  c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3752  if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3753  ++c->sorted_values;
3754  c->sorted_values[-1] = -1;
3755  compute_sorted_huffman(c, lengths, values);
3756  }
3758  if (c->sparse) {
3759  setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3760  setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3761  setup_temp_free(f, lengths, c->entries);
3762  c->codewords = NULL;
3763  }
3767  CHECK(f);
3768  c->lookup_type = get_bits(f, 4);
3769  if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3770  if (c->lookup_type > 0) {
3771  uint16 *mults;
3772  c->minimum_value = float32_unpack(get_bits(f, 32));
3773  c->delta_value = float32_unpack(get_bits(f, 32));
3774  c->value_bits = get_bits(f, 4)+1;
3775  c->sequence_p = get_bits(f,1);
3776  if (c->lookup_type == 1) {
3778  } else {
3779  c->lookup_values = c->entries * c->dimensions;
3780  }
3781  if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3782  mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3783  if (mults == NULL) return error(f, VORBIS_outofmem);
3784  for (j=0; j < (int) c->lookup_values; ++j) {
3785  int q = get_bits(f, c->value_bits);
3786  if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3787  mults[j] = q;
3788  }
3791  if (c->lookup_type == 1) {
3792  int len, sparse = c->sparse;
3793  float last=0;
3794  // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3795  if (sparse) {
3796  if (c->sorted_entries == 0) goto skip;
3797  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3798  } else
3799  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3800  if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3801  len = sparse ? c->sorted_entries : c->entries;
3802  for (j=0; j < len; ++j) {
3803  unsigned int z = sparse ? c->sorted_values[j] : j;
3804  unsigned int div=1;
3805  for (k=0; k < c->dimensions; ++k) {
3806  int off = (z / div) % c->lookup_values;
3807  float val = mults[off];
3808  val = mults[off]*c->delta_value + c->minimum_value + last;
3809  c->multiplicands[j*c->dimensions + k] = val;
3810  if (c->sequence_p)
3811  last = val;
3812  if (k+1 < c->dimensions) {
3813  if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3814  setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3815  return error(f, VORBIS_invalid_setup);
3816  }
3817  div *= c->lookup_values;
3818  }
3819  }
3820  }
3821  c->lookup_type = 2;
3822  }
3823  else
3824 #endif
3825  {
3826  float last=0;
3827  CHECK(f);
3828  c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3829  if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3830  for (j=0; j < (int) c->lookup_values; ++j) {
3831  float val = mults[j] * c->delta_value + c->minimum_value + last;
3832  c->multiplicands[j] = val;
3833  if (c->sequence_p)
3834  last = val;
3835  }
3836  }
3838  skip:;
3839 #endif
3840  setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3842  CHECK(f);
3843  }
3844  CHECK(f);
3845  }
3847  // time domain transfers (notused)
3849  x = get_bits(f, 6) + 1;
3850  for (i=0; i < x; ++i) {
3851  uint32 z = get_bits(f, 16);
3852  if (z != 0) return error(f, VORBIS_invalid_setup);
3853  }
3855  // Floors
3856  f->floor_count = get_bits(f, 6)+1;
3857  f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3858  if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3859  for (i=0; i < f->floor_count; ++i) {
3860  f->floor_types[i] = get_bits(f, 16);
3861  if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3862  if (f->floor_types[i] == 0) {
3863  Floor0 *g = &f->floor_config[i].floor0;
3864  g->order = get_bits(f,8);
3865  g->rate = get_bits(f,16);
3866  g->bark_map_size = get_bits(f,16);
3867  g->amplitude_bits = get_bits(f,6);
3868  g->amplitude_offset = get_bits(f,8);
3869  g->number_of_books = get_bits(f,4) + 1;
3870  for (j=0; j < g->number_of_books; ++j)
3871  g->book_list[j] = get_bits(f,8);
3873  } else {
3874  Point p[31*8+2];
3875  Floor1 *g = &f->floor_config[i].floor1;
3876  int max_class = -1;
3877  g->partitions = get_bits(f, 5);
3878  for (j=0; j < g->partitions; ++j) {
3879  g->partition_class_list[j] = get_bits(f, 4);
3880  if (g->partition_class_list[j] > max_class)
3881  max_class = g->partition_class_list[j];
3882  }
3883  for (j=0; j <= max_class; ++j) {
3884  g->class_dimensions[j] = get_bits(f, 3)+1;
3885  g->class_subclasses[j] = get_bits(f, 2);
3886  if (g->class_subclasses[j]) {
3887  g->class_masterbooks[j] = get_bits(f, 8);
3888  if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3889  }
3890  for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3891  g->subclass_books[j][k] = get_bits(f,8)-1;
3892  if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3893  }
3894  }
3895  g->floor1_multiplier = get_bits(f,2)+1;
3896  g->rangebits = get_bits(f,4);
3897  g->Xlist[0] = 0;
3898  g->Xlist[1] = 1 << g->rangebits;
3899  g->values = 2;
3900  for (j=0; j < g->partitions; ++j) {
3901  int c = g->partition_class_list[j];
3902  for (k=0; k < g->class_dimensions[c]; ++k) {
3903  g->Xlist[g->values] = get_bits(f, g->rangebits);
3904  ++g->values;
3905  }
3906  }
3907  // precompute the sorting
3908  for (j=0; j < g->values; ++j) {
3909  p[j].x = g->Xlist[j];
3910  p[j].y = j;
3911  }
3912  qsort(p, g->values, sizeof(p[0]), point_compare);
3913  for (j=0; j < g->values; ++j)
3914  g->sorted_order[j] = (uint8) p[j].y;
3915  // precompute the neighbors
3916  for (j=2; j < g->values; ++j) {
3917  int low = 0,hi = 0;
3918  neighbors(g->Xlist, j, &low,&hi);
3919  g->neighbors[j][0] = low;
3920  g->neighbors[j][1] = hi;
3921  }
3923  if (g->values > longest_floorlist)
3924  longest_floorlist = g->values;
3925  }
3926  }
3928  // Residue
3929  f->residue_count = get_bits(f, 6)+1;
3930  f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3931  if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3932  memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3933  for (i=0; i < f->residue_count; ++i) {
3934  uint8 residue_cascade[64];
3935  Residue *r = f->residue_config+i;
3936  f->residue_types[i] = get_bits(f, 16);
3937  if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3938  r->begin = get_bits(f, 24);
3939  r->end = get_bits(f, 24);
3940  if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3941  r->part_size = get_bits(f,24)+1;
3942  r->classifications = get_bits(f,6)+1;
3943  r->classbook = get_bits(f,8);
3944  if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3945  for (j=0; j < r->classifications; ++j) {
3946  uint8 high_bits=0;
3947  uint8 low_bits=get_bits(f,3);
3948  if (get_bits(f,1))
3949  high_bits = get_bits(f,5);
3950  residue_cascade[j] = high_bits*8 + low_bits;
3951  }
3952  r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3953  if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3954  for (j=0; j < r->classifications; ++j) {
3955  for (k=0; k < 8; ++k) {
3956  if (residue_cascade[j] & (1 << k)) {
3957  r->residue_books[j][k] = get_bits(f, 8);
3958  if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3959  } else {
3960  r->residue_books[j][k] = -1;
3961  }
3962  }
3963  }
3964  // precompute the classifications[] array to avoid inner-loop mod/divide
3965  // call it 'classdata' since we already have r->classifications
3966  r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3967  if (!r->classdata) return error(f, VORBIS_outofmem);
3968  memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3969  for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3970  int classwords = f->codebooks[r->classbook].dimensions;
3971  int temp = j;
3972  r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3973  if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3974  for (k=classwords-1; k >= 0; --k) {
3975  r->classdata[j][k] = temp % r->classifications;
3976  temp /= r->classifications;
3977  }
3978  }
3979  }
3981  f->mapping_count = get_bits(f,6)+1;
3982  f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3983  if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3984  memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3985  for (i=0; i < f->mapping_count; ++i) {
3986  Mapping *m = f->mapping + i;
3987  int mapping_type = get_bits(f,16);
3988  if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3989  m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3990  if (m->chan == NULL) return error(f, VORBIS_outofmem);
3991  if (get_bits(f,1))
3992  m->submaps = get_bits(f,4)+1;
3993  else
3994  m->submaps = 1;
3995  if (m->submaps > max_submaps)
3996  max_submaps = m->submaps;
3997  if (get_bits(f,1)) {
3998  m->coupling_steps = get_bits(f,8)+1;
3999  for (k=0; k < m->coupling_steps; ++k) {
4000  m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4001  m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4002  if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4003  if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4004  if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4005  }
4006  } else
4007  m->coupling_steps = 0;
4009  // reserved field
4010  if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4011  if (m->submaps > 1) {
4012  for (j=0; j < f->channels; ++j) {
4013  m->chan[j].mux = get_bits(f, 4);
4014  if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4015  }
4016  } else
4017  // @SPECIFICATION: this case is missing from the spec
4018  for (j=0; j < f->channels; ++j)
4019  m->chan[j].mux = 0;
4021  for (j=0; j < m->submaps; ++j) {
4022  get_bits(f,8); // discard
4023  m->submap_floor[j] = get_bits(f,8);
4024  m->submap_residue[j] = get_bits(f,8);
4025  if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4026  if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4027  }
4028  }
4030  // Modes
4031  f->mode_count = get_bits(f, 6)+1;
4032  for (i=0; i < f->mode_count; ++i) {
4033  Mode *m = f->mode_config+i;
4034  m->blockflag = get_bits(f,1);
4035  m->windowtype = get_bits(f,16);
4036  m->transformtype = get_bits(f,16);
4037  m->mapping = get_bits(f,8);
4038  if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4039  if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4040  if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4041  }
4043  flush_packet(f);
4045  f->previous_length = 0;
4047  for (i=0; i < f->channels; ++i) {
4048  f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4049  f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4050  f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4051  if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4053  f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4054  if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4055  #endif
4056  }
4058  if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4059  if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4060  f->blocksize[0] = f->blocksize_0;
4061  f->blocksize[1] = f->blocksize_1;
4064  if (integer_divide_table[1][1]==0)
4065  for (i=0; i < DIVTAB_NUMER; ++i)
4066  for (j=1; j < DIVTAB_DENOM; ++j)
4067  integer_divide_table[i][j] = i / j;
4068 #endif
4070  // compute how much temporary memory is needed
4072  // 1.
4073  {
4074  uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4075  uint32 classify_mem;
4076  int i,max_part_read=0;
4077  for (i=0; i < f->residue_count; ++i) {
4078  Residue *r = f->residue_config + i;
4079  int n_read = r->end - r->begin;
4080  int part_read = n_read / r->part_size;
4081  if (part_read > max_part_read)
4082  max_part_read = part_read;
4083  }
4085  classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4086  #else
4087  classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4088  #endif
4090  f->temp_memory_required = classify_mem;
4091  if (imdct_mem > f->temp_memory_required)
4092  f->temp_memory_required = imdct_mem;
4093  }
4095  f->first_decode = TRUE;
4097  if (f->alloc.alloc_buffer) {
4099  // check if there's enough temp memory so we don't error later
4100  if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4101  return error(f, VORBIS_outofmem);
4102  }
4106  return TRUE;
4107 }
4109 static void vorbis_deinit(stb_vorbis *p)
4110 {
4111  int i,j;
4112  if (p->residue_config) {
4113  for (i=0; i < p->residue_count; ++i) {
4114  Residue *r = p->residue_config+i;
4115  if (r->classdata) {
4116  for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4117  setup_free(p, r->classdata[j]);
4118  setup_free(p, r->classdata);
4119  }
4120  setup_free(p, r->residue_books);
4121  }
4122  }
4124  if (p->codebooks) {
4125  CHECK(p);
4126  for (i=0; i < p->codebook_count; ++i) {
4127  Codebook *c = p->codebooks + i;
4129  setup_free(p, c->multiplicands);
4130  setup_free(p, c->codewords);
4132  // c->sorted_values[-1] is the first entry in the array
4133  setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4134  }
4135  setup_free(p, p->codebooks);
4136  }
4137  setup_free(p, p->floor_config);
4138  setup_free(p, p->residue_config);
4139  if (p->mapping) {
4140  for (i=0; i < p->mapping_count; ++i)
4141  setup_free(p, p->mapping[i].chan);
4142  setup_free(p, p->mapping);
4143  }
4144  CHECK(p);
4145  for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4146  setup_free(p, p->channel_buffers[i]);
4147  setup_free(p, p->previous_window[i]);
4149  setup_free(p, p->floor_buffers[i]);
4150  #endif
4151  setup_free(p, p->finalY[i]);
4152  }
4153  for (i=0; i < 2; ++i) {
4154  setup_free(p, p->A[i]);
4155  setup_free(p, p->B[i]);
4156  setup_free(p, p->C[i]);
4157  setup_free(p, p->window[i]);
4158  setup_free(p, p->bit_reverse[i]);
4159  }
4160  #ifndef STB_VORBIS_NO_STDIO
4161  if (p->close_on_free) fclose(p->f);
4162  #endif
4163 }
4166 {
4167  if (p == NULL) return;
4168  vorbis_deinit(p);
4169  setup_free(p,p);
4170 }
4172 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4173 {
4174  memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4175  if (z) {
4176  p->alloc = *z;
4179  }
4180  p->eof = 0;
4181  p->error = VORBIS__no_error;
4182  p->stream = NULL;
4183  p->codebooks = NULL;
4184  p->page_crc_tests = -1;
4185  #ifndef STB_VORBIS_NO_STDIO
4186  p->close_on_free = FALSE;
4187  p->f = NULL;
4188  #endif
4189 }
4192 {
4193  if (f->current_loc_valid)
4194  return f->current_loc;
4195  else
4196  return -1;
4197 }
4200 {
4201  stb_vorbis_info d;
4202  d.channels = f->channels;
4203  d.sample_rate = f->sample_rate;
4207  d.max_frame_size = f->blocksize_1 >> 1;
4208  return d;
4209 }
4212 {
4213  int e = f->error;
4214  f->error = VORBIS__no_error;
4215  return e;
4216 }
4219 {
4220  stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4221  return p;
4222 }
4227 {
4228  f->previous_length = 0;
4229  f->page_crc_tests = 0;
4230  f->discard_samples_deferred = 0;
4231  f->current_loc_valid = FALSE;
4232  f->first_decode = FALSE;
4233  f->samples_output = 0;
4234  f->channel_buffer_start = 0;
4235  f->channel_buffer_end = 0;
4236 }
4238 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4239 {
4240  int i,n;
4241  for (i=0; i < f->page_crc_tests; ++i)
4242  f->scan[i].bytes_done = 0;
4244  // if we have room for more scans, search for them first, because
4245  // they may cause us to stop early if their header is incomplete
4247  if (data_len < 4) return 0;
4248  data_len -= 3; // need to look for 4-byte sequence, so don't miss
4249  // one that straddles a boundary
4250  for (i=0; i < data_len; ++i) {
4251  if (data[i] == 0x4f) {
4252  if (0==memcmp(data+i, ogg_page_header, 4)) {
4253  int j,len;
4254  uint32 crc;
4255  // make sure we have the whole page header
4256  if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4257  // only read up to this page start, so hopefully we'll
4258  // have the whole page header start next time
4259  data_len = i;
4260  break;
4261  }
4262  // ok, we have it all; compute the length of the page
4263  len = 27 + data[i+26];
4264  for (j=0; j < data[i+26]; ++j)
4265  len += data[i+27+j];
4266  // scan everything up to the embedded crc (which we must 0)
4267  crc = 0;
4268  for (j=0; j < 22; ++j)
4269  crc = crc32_update(crc, data[i+j]);
4270  // now process 4 0-bytes
4271  for ( ; j < 26; ++j)
4272  crc = crc32_update(crc, 0);
4273  // len is the total number of bytes we need to scan
4274  n = f->page_crc_tests++;
4275  f->scan[n].bytes_left = len-j;
4276  f->scan[n].crc_so_far = crc;
4277  f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4278  // if the last frame on a page is continued to the next, then
4279  // we can't recover the sample_loc immediately
4280  if (data[i+27+data[i+26]-1] == 255)
4281  f->scan[n].sample_loc = ~0;
4282  else
4283  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4284  f->scan[n].bytes_done = i+j;
4286  break;
4287  // keep going if we still have room for more
4288  }
4289  }
4290  }
4291  }
4293  for (i=0; i < f->page_crc_tests;) {
4294  uint32 crc;
4295  int j;
4296  int n = f->scan[i].bytes_done;
4297  int m = f->scan[i].bytes_left;
4298  if (m > data_len - n) m = data_len - n;
4299  // m is the bytes to scan in the current chunk
4300  crc = f->scan[i].crc_so_far;
4301  for (j=0; j < m; ++j)
4302  crc = crc32_update(crc, data[n+j]);
4303  f->scan[i].bytes_left -= m;
4304  f->scan[i].crc_so_far = crc;
4305  if (f->scan[i].bytes_left == 0) {
4306  // does it match?
4307  if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4308  // Houston, we have page
4309  data_len = n+m; // consumption amount is wherever that scan ended
4310  f->page_crc_tests = -1; // drop out of page scan mode
4311  f->previous_length = 0; // decode-but-don't-output one frame
4312  f->next_seg = -1; // start a new page
4313  f->current_loc = f->scan[i].sample_loc; // set the current sample location
4314  // to the amount we'd have decoded had we decoded this page
4315  f->current_loc_valid = f->current_loc != ~0U;
4316  return data_len;
4317  }
4318  // delete entry
4319  f->scan[i] = f->scan[--f->page_crc_tests];
4320  } else {
4321  ++i;
4322  }
4323  }
4325  return data_len;
4326 }
4328 // return value: number of bytes we used
4330  stb_vorbis *f, // the file we're decoding
4331  const uint8 *data, int data_len, // the memory available for decoding
4332  int *channels, // place to write number of float * buffers
4333  float ***output, // place to write float ** array of float * buffers
4334  int *samples // place to write number of output samples
4335  )
4336 {
4337  int i;
4338  int len,right,left;
4340  if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4342  if (f->page_crc_tests >= 0) {
4343  *samples = 0;
4344  return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4345  }
4347  f->stream = (uint8 *) data;
4348  f->stream_end = (uint8 *) data + data_len;
4349  f->error = VORBIS__no_error;
4351  // check that we have the entire packet in memory
4352  if (!is_whole_packet_present(f, FALSE)) {
4353  *samples = 0;
4354  return 0;
4355  }
4357  if (!vorbis_decode_packet(f, &len, &left, &right)) {
4358  // save the actual error we encountered
4359  enum STBVorbisError error = f->error;
4360  if (error == VORBIS_bad_packet_type) {
4361  // flush and resynch
4362  f->error = VORBIS__no_error;
4363  while (get8_packet(f) != EOP)
4364  if (f->eof) break;
4365  *samples = 0;
4366  return (int) (f->stream - data);
4367  }
4368  if (error == VORBIS_continued_packet_flag_invalid) {
4369  if (f->previous_length == 0) {
4370  // we may be resynching, in which case it's ok to hit one
4371  // of these; just discard the packet
4372  f->error = VORBIS__no_error;
4373  while (get8_packet(f) != EOP)
4374  if (f->eof) break;
4375  *samples = 0;
4376  return (int) (f->stream - data);
4377  }
4378  }
4379  // if we get an error while parsing, what to do?
4380  // well, it DEFINITELY won't work to continue from where we are!
4382  // restore the error that actually made us bail
4383  f->error = error;
4384  *samples = 0;
4385  return 1;
4386  }
4388  // success!
4389  len = vorbis_finish_frame(f, len, left, right);
4390  for (i=0; i < f->channels; ++i)
4391  f->outputs[i] = f->channel_buffers[i] + left;
4393  if (channels) *channels = f->channels;
4394  *samples = len;
4395  *output = f->outputs;
4396  return (int) (f->stream - data);
4397 }
4400  const unsigned char *data, int data_len, // the memory available for decoding
4401  int *data_used, // only defined if result is not NULL
4402  int *error, const stb_vorbis_alloc *alloc)
4403 {
4404  stb_vorbis *f, p;
4405  vorbis_init(&p, alloc);
4406 = (uint8 *) data;
4407  p.stream_end = (uint8 *) data + data_len;
4408  p.push_mode = TRUE;
4409  if (!start_decoder(&p)) {
4410  if (p.eof)
4411  *error = VORBIS_need_more_data;
4412  else
4413  *error = p.error;
4414  return NULL;
4415  }
4416  f = vorbis_alloc(&p);
4417  if (f) {
4418  *f = p;
4419  *data_used = (int) (f->stream - data);
4420  *error = 0;
4421  return f;
4422  } else {
4423  vorbis_deinit(&p);
4424  return NULL;
4425  }
4426 }
4430 {
4432  if (f->push_mode) return 0;
4433  #endif
4434  if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4435  #ifndef STB_VORBIS_NO_STDIO
4436  return (unsigned int) (ftell(f->f) - f->f_start);
4437  #endif
4438 }
4441 //
4443 //
4446 {
4447  for(;;) {
4448  int n;
4449  if (f->eof) return 0;
4450  n = get8(f);
4451  if (n == 0x4f) { // page header candidate
4452  unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4453  int i;
4454  // check if we're off the end of a file_section stream
4455  if (retry_loc - 25 > f->stream_len)
4456  return 0;
4457  // check the rest of the header
4458  for (i=1; i < 4; ++i)
4459  if (get8(f) != ogg_page_header[i])
4460  break;
4461  if (f->eof) return 0;
4462  if (i == 4) {
4463  uint8 header[27];
4464  uint32 i, crc, goal, len;
4465  for (i=0; i < 4; ++i)
4466  header[i] = ogg_page_header[i];
4467  for (; i < 27; ++i)
4468  header[i] = get8(f);
4469  if (f->eof) return 0;
4470  if (header[4] != 0) goto invalid;
4471  goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4472  for (i=22; i < 26; ++i)
4473  header[i] = 0;
4474  crc = 0;
4475  for (i=0; i < 27; ++i)
4476  crc = crc32_update(crc, header[i]);
4477  len = 0;
4478  for (i=0; i < header[26]; ++i) {
4479  int s = get8(f);
4480  crc = crc32_update(crc, s);
4481  len += s;
4482  }
4483  if (len && f->eof) return 0;
4484  for (i=0; i < len; ++i)
4485  crc = crc32_update(crc, get8(f));
4486  // finished parsing probable page
4487  if (crc == goal) {
4488  // we could now check that it's either got the last
4489  // page flag set, OR it's followed by the capture
4490  // pattern, but I guess TECHNICALLY you could have
4491  // a file with garbage between each ogg page and recover
4492  // from it automatically? So even though that paranoia
4493  // might decrease the chance of an invalid decode by
4494  // another 2^32, not worth it since it would hose those
4495  // invalid-but-useful files?
4496  if (end)
4497  *end = stb_vorbis_get_file_offset(f);
4498  if (last) {
4499  if (header[5] & 0x04)
4500  *last = 1;
4501  else
4502  *last = 0;
4503  }
4504  set_file_offset(f, retry_loc-1);
4505  return 1;
4506  }
4507  }
4508  invalid:
4509  // not a valid page, so rewind and look for next one
4510  set_file_offset(f, retry_loc);
4511  }
4512  }
4513 }
4516 #define SAMPLE_unknown 0xffffffff
4518 // seeking is implemented with a binary search, which narrows down the range to
4519 // 64K, before using a linear search (because finding the synchronization
4520 // pattern can be expensive, and the chance we'd find the end page again is
4521 // relatively high for small ranges)
4522 //
4523 // two initial interpolation-style probes are used at the start of the search
4524 // to try to bound either side of the binary search sensibly, while still
4525 // working in O(log n) time if they fail.
4528 {
4529  uint8 header[27], lacing[255];
4530  int i,len;
4532  // record where the page starts
4535  // parse the header
4536  getn(f, header, 27);
4537  if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4538  return 0;
4539  getn(f, lacing, header[26]);
4541  // determine the length of the payload
4542  len = 0;
4543  for (i=0; i < header[26]; ++i)
4544  len += lacing[i];
4546  // this implies where the page ends
4547  z->page_end = z->page_start + 27 + header[26] + len;
4549  // read the last-decoded sample out of the data
4550  z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4552  // restore file state to where we were
4553  set_file_offset(f, z->page_start);
4554  return 1;
4555 }
4557 // rarely used function to seek back to the preceeding page while finding the
4558 // start of a packet
4559 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4560 {
4561  unsigned int previous_safe, end;
4563  // now we want to seek back 64K from the limit
4564  if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4565  previous_safe = limit_offset - 65536;
4566  else
4567  previous_safe = f->first_audio_page_offset;
4569  set_file_offset(f, previous_safe);
4571  while (vorbis_find_page(f, &end, NULL)) {
4572  if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4573  return 1;
4574  set_file_offset(f, end);
4575  }
4577  return 0;
4578 }
4580 // implements the search logic for finding a page and starting decoding. if
4581 // the function succeeds, current_loc_valid will be true and current_loc will
4582 // be less than or equal to the provided sample number (the closer the
4583 // better).
4584 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4585 {
4586  ProbedPage left, right, mid;
4587  int i, start_seg_with_known_loc, end_pos, page_start;
4588  uint32 delta, stream_length, padding;
4589  double offset = 0, bytes_per_sample = 0;
4590  int probe = 0;
4592  // find the last page and validate the target sample
4593  stream_length = stb_vorbis_stream_length_in_samples(f);
4594  if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4595  if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4597  // this is the maximum difference between the window-center (which is the
4598  // actual granule position value), and the right-start (which the spec
4599  // indicates should be the granule position (give or take one)).
4600  padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4601  if (sample_number < padding)
4602  sample_number = 0;
4603  else
4604  sample_number -= padding;
4606  left = f->p_first;
4607  while (left.last_decoded_sample == ~0U) {
4608  // (untested) the first page does not have a 'last_decoded_sample'
4609  set_file_offset(f, left.page_end);
4610  if (!get_seek_page_info(f, &left)) goto error;
4611  }
4613  right = f->p_last;
4614  assert(right.last_decoded_sample != ~0U);
4616  // starting from the start is handled differently
4617  if (sample_number <= left.last_decoded_sample) {
4619  return 1;
4620  }
4622  while (left.page_end != right.page_start) {
4623  assert(left.page_end < right.page_start);
4624  // search range in bytes
4625  delta = right.page_start - left.page_end;
4626  if (delta <= 65536) {
4627  // there's only 64K left to search - handle it linearly
4628  set_file_offset(f, left.page_end);
4629  } else {
4630  if (probe < 2) {
4631  if (probe == 0) {
4632  // first probe (interpolate)
4633  double data_bytes = right.page_end - left.page_start;
4634  bytes_per_sample = data_bytes / right.last_decoded_sample;
4635  offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4636  } else {
4637  // second probe (try to bound the other side)
4638  double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4639  if (error >= 0 && error < 8000) error = 8000;
4640  if (error < 0 && error > -8000) error = -8000;
4641  offset += error * 2;
4642  }
4644  // ensure the offset is valid
4645  if (offset < left.page_end)
4646  offset = left.page_end;
4647  if (offset > right.page_start - 65536)
4648  offset = right.page_start - 65536;
4650  set_file_offset(f, (unsigned int) offset);
4651  } else {
4652  // binary search for large ranges (offset by 32K to ensure
4653  // we don't hit the right page)
4654  set_file_offset(f, left.page_end + (delta / 2) - 32768);
4655  }
4657  if (!vorbis_find_page(f, NULL, NULL)) goto error;
4658  }
4660  for (;;) {
4661  if (!get_seek_page_info(f, &mid)) goto error;
4662  if (mid.last_decoded_sample != ~0U) break;
4663  // (untested) no frames end on this page
4664  set_file_offset(f, mid.page_end);
4665  assert(mid.page_start < right.page_start);
4666  }
4668  // if we've just found the last page again then we're in a tricky file,
4669  // and we're close enough.
4670  if (mid.page_start == right.page_start)
4671  break;
4673  if (sample_number < mid.last_decoded_sample)
4674  right = mid;
4675  else
4676  left = mid;
4678  ++probe;
4679  }
4681  // seek back to start of the last packet
4682  page_start = left.page_start;
4683  set_file_offset(f, page_start);
4684  if (!start_page(f)) return error(f, VORBIS_seek_failed);
4685  end_pos = f->end_seg_with_known_loc;
4686  assert(end_pos >= 0);
4688  for (;;) {
4689  for (i = end_pos; i > 0; --i)
4690  if (f->segments[i-1] != 255)
4691  break;
4693  start_seg_with_known_loc = i;
4695  if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4696  break;
4698  // (untested) the final packet begins on an earlier page
4699  if (!go_to_page_before(f, page_start))
4700  goto error;
4702  page_start = stb_vorbis_get_file_offset(f);
4703  if (!start_page(f)) goto error;
4704  end_pos = f->segment_count - 1;
4705  }
4707  // prepare to start decoding
4708  f->current_loc_valid = FALSE;
4709  f->last_seg = FALSE;
4710  f->valid_bits = 0;
4711  f->packet_bytes = 0;
4712  f->bytes_in_seg = 0;
4713  f->previous_length = 0;
4714  f->next_seg = start_seg_with_known_loc;
4716  for (i = 0; i < start_seg_with_known_loc; i++)
4717  skip(f, f->segments[i]);
4719  // start decoding (optimizable - this frame is generally discarded)
4721  return 1;
4723 error:
4724  // try to restore the file to a valid state
4726  return error(f, VORBIS_seek_failed);
4727 }
4729 // the same as vorbis_decode_initial, but without advancing
4730 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4731 {
4732  int bits_read, bytes_read;
4734  if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4735  return 0;
4737  // either 1 or 2 bytes were read, figure out which so we can rewind
4738  bits_read = 1 + ilog(f->mode_count-1);
4739  if (f->mode_config[*mode].blockflag)
4740  bits_read += 2;
4741  bytes_read = (bits_read + 7) / 8;
4743  f->bytes_in_seg += bytes_read;
4744  f->packet_bytes -= bytes_read;
4745  skip(f, -bytes_read);
4746  if (f->next_seg == -1)
4747  f->next_seg = f->segment_count - 1;
4748  else
4749  f->next_seg--;
4750  f->valid_bits = 0;
4752  return 1;
4753 }
4755 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4756 {
4757  uint32 max_frame_samples;
4759  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4761  // fast page-level search
4762  if (!seek_to_sample_coarse(f, sample_number))
4763  return 0;
4765  assert(f->current_loc_valid);
4766  assert(f->current_loc <= sample_number);
4768  // linear search for the relevant packet
4769  max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4770  while (f->current_loc < sample_number) {
4771  int left_start, left_end, right_start, right_end, mode, frame_samples;
4772  if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4773  return error(f, VORBIS_seek_failed);
4774  // calculate the number of samples returned by the next frame
4775  frame_samples = right_start - left_start;
4776  if (f->current_loc + frame_samples > sample_number) {
4777  return 1; // the next frame will contain the sample
4778  } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4779  // there's a chance the frame after this could contain the sample
4781  } else {
4782  // this frame is too early to be relevant
4783  f->current_loc += frame_samples;
4784  f->previous_length = 0;
4785  maybe_start_packet(f);
4786  flush_packet(f);
4787  }
4788  }
4789  // the next frame will start with the sample
4790  assert(f->current_loc == sample_number);
4791  return 1;
4792 }
4794 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4795 {
4796  if (!stb_vorbis_seek_frame(f, sample_number))
4797  return 0;
4799  if (sample_number != f->current_loc) {
4800  int n;
4801  uint32 frame_start = f->current_loc;
4802  stb_vorbis_get_frame_float(f, &n, NULL);
4803  assert(sample_number > frame_start);
4804  assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4805  f->channel_buffer_start += (sample_number - frame_start);
4806  }
4808  return 1;
4809 }
4812 {
4813  if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4815  f->previous_length = 0;
4816  f->first_decode = TRUE;
4817  f->next_seg = -1;
4819 }
4822 {
4823  unsigned int restore_offset, previous_safe;
4824  unsigned int end, last_page_loc;
4826  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4827  if (!f->total_samples) {
4828  unsigned int last;
4829  uint32 lo,hi;
4830  char header[6];
4832  // first, store the current decode position so we can restore it
4833  restore_offset = stb_vorbis_get_file_offset(f);
4835  // now we want to seek back 64K from the end (the last page must
4836  // be at most a little less than 64K, but let's allow a little slop)
4837  if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4838  previous_safe = f->stream_len - 65536;
4839  else
4840  previous_safe = f->first_audio_page_offset;
4842  set_file_offset(f, previous_safe);
4843  // previous_safe is now our candidate 'earliest known place that seeking
4844  // to will lead to the final page'
4846  if (!vorbis_find_page(f, &end, &last)) {
4847  // if we can't find a page, we're hosed!
4849  f->total_samples = 0xffffffff;
4850  goto done;
4851  }
4853  // check if there are more pages
4854  last_page_loc = stb_vorbis_get_file_offset(f);
4856  // stop when the last_page flag is set, not when we reach eof;
4857  // this allows us to stop short of a 'file_section' end without
4858  // explicitly checking the length of the section
4859  while (!last) {
4860  set_file_offset(f, end);
4861  if (!vorbis_find_page(f, &end, &last)) {
4862  // the last page we found didn't have the 'last page' flag
4863  // set. whoops!
4864  break;
4865  }
4866  previous_safe = last_page_loc+1;
4867  last_page_loc = stb_vorbis_get_file_offset(f);
4868  }
4870  set_file_offset(f, last_page_loc);
4872  // parse the header
4873  getn(f, (unsigned char *)header, 6);
4874  // extract the absolute granule position
4875  lo = get32(f);
4876  hi = get32(f);
4877  if (lo == 0xffffffff && hi == 0xffffffff) {
4880  goto done;
4881  }
4882  if (hi)
4883  lo = 0xfffffffe; // saturate
4884  f->total_samples = lo;
4886  f->p_last.page_start = last_page_loc;
4887  f->p_last.page_end = end;
4888  f->p_last.last_decoded_sample = lo;
4890  done:
4891  set_file_offset(f, restore_offset);
4892  }
4893  return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4894 }
4897 {
4898  return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4899 }
4903 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4904 {
4905  int len, right,left,i;
4906  if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4908  if (!vorbis_decode_packet(f, &len, &left, &right)) {
4910  return 0;
4911  }
4913  len = vorbis_finish_frame(f, len, left, right);
4914  for (i=0; i < f->channels; ++i)
4915  f->outputs[i] = f->channel_buffers[i] + left;
4917  f->channel_buffer_start = left;
4918  f->channel_buffer_end = left+len;
4920  if (channels) *channels = f->channels;
4921  if (output) *output = f->outputs;
4922  return len;
4923 }
4925 #ifndef STB_VORBIS_NO_STDIO
4927 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4928 {
4929  stb_vorbis *f, p;
4930  vorbis_init(&p, alloc);
4931  p.f = file;
4932  p.f_start = (uint32) ftell(file);
4933  p.stream_len = length;
4934  p.close_on_free = close_on_free;
4935  if (start_decoder(&p)) {
4936  f = vorbis_alloc(&p);
4937  if (f) {
4938  *f = p;
4940  return f;
4941  }
4942  }
4943  if (error) *error = p.error;
4944  vorbis_deinit(&p);
4945  return NULL;
4946 }
4948 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4949 {
4950  unsigned int len, start;
4951  start = (unsigned int) ftell(file);
4952  fseek(file, 0, SEEK_END);
4953  len = (unsigned int) (ftell(file) - start);
4954  fseek(file, start, SEEK_SET);
4955  return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4956 }
4958 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4959 {
4960  FILE *f = fopen(filename, "rb");
4961  if (f)
4962  return stb_vorbis_open_file(f, TRUE, error, alloc);
4963  if (error) *error = VORBIS_file_open_failure;
4964  return NULL;
4965 }
4966 #endif // STB_VORBIS_NO_STDIO
4968 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4969 {
4970  stb_vorbis *f, p;
4971  if (data == NULL) return NULL;
4972  vorbis_init(&p, alloc);
4973 = (uint8 *) data;
4974  p.stream_end = (uint8 *) data + len;
4975  p.stream_start = (uint8 *);
4976  p.stream_len = len;
4977  p.push_mode = FALSE;
4978  if (start_decoder(&p)) {
4979  f = vorbis_alloc(&p);
4980  if (f) {
4981  *f = p;
4983  return f;
4984  }
4985  }
4986  if (error) *error = p.error;
4987  vorbis_deinit(&p);
4988  return NULL;
4989 }
4992 #define PLAYBACK_MONO 1
4993 #define PLAYBACK_LEFT 2
4994 #define PLAYBACK_RIGHT 4
5000 static int8 channel_position[7][6] =
5001 {
5002  { 0 },
5003  { C },
5004  { L, R },
5005  { L, C, R },
5006  { L, R, L, R },
5007  { L, C, R, L, R },
5008  { L, C, R, L, R, C },
5009 };
5013  typedef union {
5014  float f;
5015  int i;
5016  } float_conv;
5017  typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5018  #define FASTDEF(x) float_conv x
5019  // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5020  #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5021  #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5022  #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5023  #define check_endianness()
5024 #else
5025  #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5026  #define check_endianness()
5027  #define FASTDEF(x)
5028 #endif
5030 static void copy_samples(short *dest, float *src, int len)
5031 {
5032  int i;
5033  check_endianness();
5034  for (i=0; i < len; ++i) {
5035  FASTDEF(temp);
5036  int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5037  if ((unsigned int) (v + 32768) > 65535)
5038  v = v < 0 ? -32768 : 32767;
5039  dest[i] = v;
5040  }
5041 }
5043 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5044 {
5045  #define BUFFER_SIZE 32
5046  float buffer[BUFFER_SIZE];
5047  int i,j,o,n = BUFFER_SIZE;
5048  check_endianness();
5049  for (o = 0; o < len; o += BUFFER_SIZE) {
5050  memset(buffer, 0, sizeof(buffer));
5051  if (o + n > len) n = len - o;
5052  for (j=0; j < num_c; ++j) {
5053  if (channel_position[num_c][j] & mask) {
5054  for (i=0; i < n; ++i)
5055  buffer[i] += data[j][d_offset+o+i];
5056  }
5057  }
5058  for (i=0; i < n; ++i) {
5059  FASTDEF(temp);
5060  int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5061  if ((unsigned int) (v + 32768) > 65535)
5062  v = v < 0 ? -32768 : 32767;
5063  output[o+i] = v;
5064  }
5065  }
5066 }
5068 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5069 {
5070  #define BUFFER_SIZE 32
5071  float buffer[BUFFER_SIZE];
5072  int i,j,o,n = BUFFER_SIZE >> 1;
5073  // o is the offset in the source data
5074  check_endianness();
5075  for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5076  // o2 is the offset in the output data
5077  int o2 = o << 1;
5078  memset(buffer, 0, sizeof(buffer));
5079  if (o + n > len) n = len - o;
5080  for (j=0; j < num_c; ++j) {
5081  int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5082  if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5083  for (i=0; i < n; ++i) {
5084  buffer[i*2+0] += data[j][d_offset+o+i];
5085  buffer[i*2+1] += data[j][d_offset+o+i];
5086  }
5087  } else if (m == PLAYBACK_LEFT) {
5088  for (i=0; i < n; ++i) {
5089  buffer[i*2+0] += data[j][d_offset+o+i];
5090  }
5091  } else if (m == PLAYBACK_RIGHT) {
5092  for (i=0; i < n; ++i) {
5093  buffer[i*2+1] += data[j][d_offset+o+i];
5094  }
5095  }
5096  }
5097  for (i=0; i < (n<<1); ++i) {
5098  FASTDEF(temp);
5099  int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5100  if ((unsigned int) (v + 32768) > 65535)
5101  v = v < 0 ? -32768 : 32767;
5102  output[o2+i] = v;
5103  }
5104  }
5105 }
5107 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5108 {
5109  int i;
5110  if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5111  static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5112  for (i=0; i < buf_c; ++i)
5113  compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5114  } else {
5115  int limit = buf_c < data_c ? buf_c : data_c;
5116  for (i=0; i < limit; ++i)
5117  copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5118  for ( ; i < buf_c; ++i)
5119  memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5120  }
5121 }
5123 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5124 {
5125  float **output = NULL;
5126  int len = stb_vorbis_get_frame_float(f, NULL, &output);
5127  if (len > num_samples) len = num_samples;
5128  if (len)
5129  convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5130  return len;
5131 }
5133 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5134 {
5135  int i;
5136  check_endianness();
5137  if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5138  assert(buf_c == 2);
5139  for (i=0; i < buf_c; ++i)
5140  compute_stereo_samples(buffer, data_c, data, d_offset, len);
5141  } else {
5142  int limit = buf_c < data_c ? buf_c : data_c;
5143  int j;
5144  for (j=0; j < len; ++j) {
5145  for (i=0; i < limit; ++i) {
5146  FASTDEF(temp);
5147  float f = data[i][d_offset+j];
5148  int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5149  if ((unsigned int) (v + 32768) > 65535)
5150  v = v < 0 ? -32768 : 32767;
5151  *buffer++ = v;
5152  }
5153  for ( ; i < buf_c; ++i)
5154  *buffer++ = 0;
5155  }
5156  }
5157 }
5159 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5160 {
5161  float **output;
5162  int len;
5163  if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5164  len = stb_vorbis_get_frame_float(f, NULL, &output);
5165  if (len) {
5166  if (len*num_c > num_shorts) len = num_shorts / num_c;
5167  convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5168  }
5169  return len;
5170 }
5172 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5173 {
5174  float **outputs;
5175  int len = num_shorts / channels;
5176  int n=0;
5177  int z = f->channels;
5178  if (z > channels) z = channels;
5179  while (n < len) {
5180  int k = f->channel_buffer_end - f->channel_buffer_start;
5181  if (n+k >= len) k = len - n;
5182  if (k)
5184  buffer += k*channels;
5185  n += k;
5186  f->channel_buffer_start += k;
5187  if (n == len) break;
5188  if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5189  }
5190  return n;
5191 }
5193 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5194 {
5195  float **outputs;
5196  int n=0;
5197  int z = f->channels;
5198  if (z > channels) z = channels;
5199  while (n < len) {
5200  int k = f->channel_buffer_end - f->channel_buffer_start;
5201  if (n+k >= len) k = len - n;
5202  if (k)
5203  convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5204  n += k;
5205  f->channel_buffer_start += k;
5206  if (n == len) break;
5207  if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5208  }
5209  return n;
5210 }
5212 #ifndef STB_VORBIS_NO_STDIO
5213 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5214 {
5215  int data_len, offset, total, limit, error;
5216  short *data;
5217  stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5218  if (v == NULL) return -1;
5219  limit = v->channels * 4096;
5220  *channels = v->channels;
5221  if (sample_rate)
5222  *sample_rate = v->sample_rate;
5223  offset = data_len = 0;
5224  total = limit;
5225  data = (short *) malloc(total * sizeof(*data));
5226  if (data == NULL) {
5227  stb_vorbis_close(v);
5228  return -2;
5229  }
5230  for (;;) {
5231  int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5232  if (n == 0) break;
5233  data_len += n;
5234  offset += n * v->channels;
5235  if (offset + limit > total) {
5236  short *data2;
5237  total *= 2;
5238  data2 = (short *) realloc(data, total * sizeof(*data));
5239  if (data2 == NULL) {
5240  free(data);
5241  stb_vorbis_close(v);
5242  return -2;
5243  }
5244  data = data2;
5245  }
5246  }
5247  *output = data;
5248  stb_vorbis_close(v);
5249  return data_len;
5250 }
5251 #endif // NO_STDIO
5253 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5254 {
5255  int data_len, offset, total, limit, error;
5256  short *data;
5257  stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5258  if (v == NULL) return -1;
5259  limit = v->channels * 4096;
5260  *channels = v->channels;
5261  if (sample_rate)
5262  *sample_rate = v->sample_rate;
5263  offset = data_len = 0;
5264  total = limit;
5265  data = (short *) malloc(total * sizeof(*data));
5266  if (data == NULL) {
5267  stb_vorbis_close(v);
5268  return -2;
5269  }
5270  for (;;) {
5271  int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5272  if (n == 0) break;
5273  data_len += n;
5274  offset += n * v->channels;
5275  if (offset + limit > total) {
5276  short *data2;
5277  total *= 2;
5278  data2 = (short *) realloc(data, total * sizeof(*data));
5279  if (data2 == NULL) {
5280  free(data);
5281  stb_vorbis_close(v);
5282  return -2;
5283  }
5284  data = data2;
5285  }
5286  }
5287  *output = data;
5288  stb_vorbis_close(v);
5289  return data_len;
5290 }
5293 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5294 {
5295  float **outputs;
5296  int len = num_floats / channels;
5297  int n=0;
5298  int z = f->channels;
5299  if (z > channels) z = channels;
5300  while (n < len) {
5301  int i,j;
5302  int k = f->channel_buffer_end - f->channel_buffer_start;
5303  if (n+k >= len) k = len - n;
5304  for (j=0; j < k; ++j) {
5305  for (i=0; i < z; ++i)
5306  *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5307  for ( ; i < channels; ++i)
5308  *buffer++ = 0;
5309  }
5310  n += k;
5311  f->channel_buffer_start += k;
5312  if (n == len)
5313  break;
5314  if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5315  break;
5316  }
5317  return n;
5318 }
5320 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5321 {
5322  float **outputs;
5323  int n=0;
5324  int z = f->channels;
5325  if (z > channels) z = channels;
5326  while (n < num_samples) {
5327  int i;
5328  int k = f->channel_buffer_end - f->channel_buffer_start;
5329  if (n+k >= num_samples) k = num_samples - n;
5330  if (k) {
5331  for (i=0; i < z; ++i)
5332  memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5333  for ( ; i < channels; ++i)
5334  memset(buffer[i]+n, 0, sizeof(float) * k);
5335  }
5336  n += k;
5337  f->channel_buffer_start += k;
5338  if (n == num_samples)
5339  break;
5340  if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5341  break;
5342  }
5343  return n;
5344 }
5347 /* Version history
5348  1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5349  1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5350  avoid discarding last frame of audio data
5351  1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5352  some more crash fixes when out of memory or with corrupt files
5353  1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5354  some crash fixes when out of memory or with corrupt files
5355  1.05 - 2015/04/19 - don't define __forceinline if it's redundant
5356  1.04 - 2014/08/27 - fix missing const-correct case in API
5357  1.03 - 2014/08/07 - Warning fixes
5358  1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows
5359  1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float
5360  1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5361  (API change) report sample rate for decode-full-file funcs
5362  0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5363  0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5364  0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5365  0.99993 - remove assert that fired on legal files with empty tables
5366  0.99992 - rewind-to-start
5367  0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5368  0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5369  0.9998 - add a full-decode function with a memory source
5370  0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5371  0.9996 - query length of vorbis stream in samples/seconds
5372  0.9995 - bugfix to another optimization that only happened in certain files
5373  0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5374  0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5375  0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5376  0.9991 - performance improvement of IMDCT
5377  0.999 - (should have been 0.9990) performance improvement of IMDCT
5378  0.998 - no-CRT support from Casey Muratori
5379  0.997 - bugfixes for bugs found by Terje Mathisen
5380  0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5381  0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5382  0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5383  0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5384  0.992 - fixes for MinGW warning
5385  0.991 - turn fast-float-conversion on by default
5386  0.990 - fix push-mode seek recovery if you seek into the headers
5387  0.98b - fix to bad release of 0.98
5388  0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5389  0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5390  0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5391  0.95 - clamping code for 16-bit functions
5392  0.94 - not publically released
5393  0.93 - fixed all-zero-floor case (was decoding garbage)
5394  0.92 - fixed a memory leak
5395  0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5396  0.90 - first public release
5397 */