001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.nio.ByteOrder;
030import java.util.Arrays;
031
032import org.apache.commons.compress.compressors.CompressorInputStream;
033import org.apache.commons.compress.utils.BitInputStream;
034import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
035
036/**
037 * An input stream that decompresses from the BZip2 format to be read as any other stream.
038 *
039 * @NotThreadSafe
040 */
041public class BZip2CompressorInputStream extends CompressorInputStream implements
042                                                                          BZip2Constants {
043
044    /**
045     * Index of the last char in the block, so the block size == last + 1.
046     */
047    private int last;
048
049    /**
050     * Index in zptr[] of original string after sorting.
051     */
052    private int origPtr;
053
054    /**
055     * always: in the range 0 .. 9. The current block size is 100000 * this
056     * number.
057     */
058    private int blockSize100k;
059
060    private boolean blockRandomised;
061
062    private final CRC crc = new CRC();
063
064    private int nInUse;
065
066    private BitInputStream bin;
067    private final boolean decompressConcatenated;
068
069    private static final int EOF = 0;
070    private static final int START_BLOCK_STATE = 1;
071    private static final int RAND_PART_A_STATE = 2;
072    private static final int RAND_PART_B_STATE = 3;
073    private static final int RAND_PART_C_STATE = 4;
074    private static final int NO_RAND_PART_A_STATE = 5;
075    private static final int NO_RAND_PART_B_STATE = 6;
076    private static final int NO_RAND_PART_C_STATE = 7;
077
078    private int currentState = START_BLOCK_STATE;
079
080    private int storedBlockCRC, storedCombinedCRC;
081    private int computedBlockCRC, computedCombinedCRC;
082
083    // Variables used by setup* methods exclusively
084
085    private int su_count;
086    private int su_ch2;
087    private int su_chPrev;
088    private int su_i2;
089    private int su_j2;
090    private int su_rNToGo;
091    private int su_rTPos;
092    private int su_tPos;
093    private char su_z;
094
095    /**
096     * All memory intensive stuff. This field is initialized by initBlock().
097     */
098    private BZip2CompressorInputStream.Data data;
099
100    /**
101     * Constructs a new BZip2CompressorInputStream which decompresses bytes
102     * read from the specified stream. This doesn't suppprt decompressing
103     * concatenated .bz2 files.
104     *
105     * @param in the InputStream from which this object should be created
106     * @throws IOException
107     *             if the stream content is malformed or an I/O error occurs.
108     * @throws NullPointerException
109     *             if {@code in == null}
110     */
111    public BZip2CompressorInputStream(final InputStream in) throws IOException {
112        this(in, false);
113    }
114
115    /**
116     * Constructs a new BZip2CompressorInputStream which decompresses bytes
117     * read from the specified stream.
118     *
119     * @param in the InputStream from which this object should be created
120     * @param decompressConcatenated
121     *                     if true, decompress until the end of the input;
122     *                     if false, stop after the first .bz2 stream and
123     *                     leave the input position to point to the next
124     *                     byte after the .bz2 stream
125     *
126     * @throws IOException
127     *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
128     */
129    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
130        this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
131            ByteOrder.BIG_ENDIAN);
132        this.decompressConcatenated = decompressConcatenated;
133
134        init(true);
135        initBlock();
136    }
137
138    @Override
139    public int read() throws IOException {
140        if (this.bin != null) {
141            final int r = read0();
142            count(r < 0 ? -1 : 1);
143            return r;
144        }
145        throw new IOException("stream closed");
146    }
147
148    /*
149     * (non-Javadoc)
150     *
151     * @see java.io.InputStream#read(byte[], int, int)
152     */
153    @Override
154    public int read(final byte[] dest, final int offs, final int len)
155        throws IOException {
156        if (offs < 0) {
157            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
158        }
159        if (len < 0) {
160            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
161        }
162        if (offs + len > dest.length) {
163            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
164                                                + len + ") > dest.length(" + dest.length + ").");
165        }
166        if (this.bin == null) {
167            throw new IOException("stream closed");
168        }
169        if (len == 0) {
170            return 0;
171        }
172
173        final int hi = offs + len;
174        int destOffs = offs;
175        int b;
176        while (destOffs < hi && ((b = read0()) >= 0)) {
177            dest[destOffs++] = (byte) b;
178            count(1);
179        }
180
181        return (destOffs == offs) ? -1 : (destOffs - offs);
182    }
183
184    private void makeMaps() {
185        final boolean[] inUse = this.data.inUse;
186        final byte[] seqToUnseq = this.data.seqToUnseq;
187
188        int nInUseShadow = 0;
189
190        for (int i = 0; i < 256; i++) {
191            if (inUse[i]) {
192                seqToUnseq[nInUseShadow++] = (byte) i;
193            }
194        }
195
196        this.nInUse = nInUseShadow;
197    }
198
199    private int read0() throws IOException {
200        switch (currentState) {
201        case EOF:
202            return -1;
203
204        case START_BLOCK_STATE:
205            return setupBlock();
206
207        case RAND_PART_A_STATE:
208            throw new IllegalStateException();
209
210        case RAND_PART_B_STATE:
211            return setupRandPartB();
212
213        case RAND_PART_C_STATE:
214            return setupRandPartC();
215
216        case NO_RAND_PART_A_STATE:
217            throw new IllegalStateException();
218
219        case NO_RAND_PART_B_STATE:
220            return setupNoRandPartB();
221
222        case NO_RAND_PART_C_STATE:
223            return setupNoRandPartC();
224
225        default:
226            throw new IllegalStateException();
227        }
228    }
229
230    private int readNextByte(BitInputStream in) throws IOException {
231        long b = in.readBits(8);
232        return (int) b;
233    }
234
235    private boolean init(final boolean isFirstStream) throws IOException {
236        if (null == bin) {
237            throw new IOException("No InputStream");
238        }
239
240        if (!isFirstStream) {
241            bin.clearBitCache();
242        }
243
244        final int magic0 = readNextByte(this.bin);
245        if (magic0 == -1 && !isFirstStream) {
246            return false;
247        }
248        final int magic1 = readNextByte(this.bin);
249        final int magic2 = readNextByte(this.bin);
250
251        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
252            throw new IOException(isFirstStream
253                    ? "Stream is not in the BZip2 format"
254                    : "Garbage after a valid BZip2 stream");
255        }
256
257        final int blockSize = readNextByte(this.bin);
258        if ((blockSize < '1') || (blockSize > '9')) {
259            throw new IOException("BZip2 block size is invalid");
260        }
261
262        this.blockSize100k = blockSize - '0';
263
264        this.computedCombinedCRC = 0;
265
266        return true;
267    }
268
269    private void initBlock() throws IOException {
270        BitInputStream bin = this.bin;
271        char magic0;
272        char magic1;
273        char magic2;
274        char magic3;
275        char magic4;
276        char magic5;
277
278        while (true) {
279            // Get the block magic bytes.
280            magic0 = bsGetUByte(bin);
281            magic1 = bsGetUByte(bin);
282            magic2 = bsGetUByte(bin);
283            magic3 = bsGetUByte(bin);
284            magic4 = bsGetUByte(bin);
285            magic5 = bsGetUByte(bin);
286
287            // If isn't end of stream magic, break out of the loop.
288            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
289                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
290                break;
291            }
292
293            // End of stream was reached. Check the combined CRC and
294            // advance to the next .bz2 stream if decoding concatenated
295            // streams.
296            if (complete()) {
297                return;
298            }
299        }
300
301        if (magic0 != 0x31 || // '1'
302            magic1 != 0x41 || // ')'
303            magic2 != 0x59 || // 'Y'
304            magic3 != 0x26 || // '&'
305            magic4 != 0x53 || // 'S'
306            magic5 != 0x59 // 'Y'
307            ) {
308            this.currentState = EOF;
309            throw new IOException("bad block header");
310        }
311        this.storedBlockCRC = bsGetInt(bin);
312        this.blockRandomised = bsR(bin, 1) == 1;
313
314        /**
315         * Allocate data here instead in constructor, so we do not allocate
316         * it if the input file is empty.
317         */
318        if (this.data == null) {
319            this.data = new Data(this.blockSize100k);
320        }
321
322        // currBlockNo++;
323        getAndMoveToFrontDecode();
324
325        this.crc.initialiseCRC();
326        this.currentState = START_BLOCK_STATE;
327    }
328
329    private void endBlock() throws IOException {
330        this.computedBlockCRC = this.crc.getFinalCRC();
331
332        // A bad CRC is considered a fatal error.
333        if (this.storedBlockCRC != this.computedBlockCRC) {
334            // make next blocks readable without error
335            // (repair feature, not yet documented, not tested)
336            this.computedCombinedCRC = (this.storedCombinedCRC << 1)
337                | (this.storedCombinedCRC >>> 31);
338            this.computedCombinedCRC ^= this.storedBlockCRC;
339
340            throw new IOException("BZip2 CRC error");
341        }
342
343        this.computedCombinedCRC = (this.computedCombinedCRC << 1)
344            | (this.computedCombinedCRC >>> 31);
345        this.computedCombinedCRC ^= this.computedBlockCRC;
346    }
347
348    private boolean complete() throws IOException {
349        this.storedCombinedCRC = bsGetInt(bin);
350        this.currentState = EOF;
351        this.data = null;
352
353        if (this.storedCombinedCRC != this.computedCombinedCRC) {
354            throw new IOException("BZip2 CRC error");
355        }
356
357        // Look for the next .bz2 stream if decompressing
358        // concatenated files.
359        return !decompressConcatenated || !init(false);
360    }
361
362    @Override
363    public void close() throws IOException {
364        final BitInputStream inShadow = this.bin;
365        if (inShadow != null) {
366            try {
367                inShadow.close();
368            } finally {
369                this.data = null;
370                this.bin = null;
371            }
372        }
373    }
374
375    /**
376     * read bits from the input stream
377     * @param n the number of bits to read, must not exceed 32?
378     * @return the requested bits combined into an int
379     * @throws IOException
380     */
381    private static int bsR(BitInputStream bin, final int n) throws IOException {
382        long thech = bin.readBits(n);
383        if (thech < 0) {
384            throw new IOException("unexpected end of stream");
385        }
386        return (int) thech;
387    }
388
389    private static boolean bsGetBit(BitInputStream bin) throws IOException {
390        return bsR(bin, 1) != 0;
391    }
392
393    private static char bsGetUByte(BitInputStream bin) throws IOException {
394        return (char) bsR(bin, 8);
395    }
396
397    private static int bsGetInt(BitInputStream bin) throws IOException {
398        return bsR(bin, 32);
399    }
400
401    private static void checkBounds(final int checkVal, final int limitExclusive, String name)
402        throws IOException {
403        if (checkVal < 0) {
404            throw new IOException("Corrupted input, " + name + " value negative");
405        }
406        if (checkVal >= limitExclusive) {
407            throw new IOException("Corrupted input, " + name + " value too big");
408        }
409    }
410
411    /**
412     * Called by createHuffmanDecodingTables() exclusively.
413     */
414    private static void hbCreateDecodeTables(final int[] limit,
415                                             final int[] base, final int[] perm, final char[] length,
416                                             final int minLen, final int maxLen, final int alphaSize)
417        throws IOException {
418        for (int i = minLen, pp = 0; i <= maxLen; i++) {
419            for (int j = 0; j < alphaSize; j++) {
420                if (length[j] == i) {
421                    perm[pp++] = j;
422                }
423            }
424        }
425
426        for (int i = MAX_CODE_LEN; --i > 0;) {
427            base[i] = 0;
428            limit[i] = 0;
429        }
430
431        for (int i = 0; i < alphaSize; i++) {
432            final int l = length[i];
433            checkBounds(l, MAX_ALPHA_SIZE, "length");
434            base[l + 1]++;
435        }
436
437        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
438            b += base[i];
439            base[i] = b;
440        }
441
442        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
443            final int nb = base[i + 1];
444            vec += nb - b;
445            b = nb;
446            limit[i] = vec - 1;
447            vec <<= 1;
448        }
449
450        for (int i = minLen + 1; i <= maxLen; i++) {
451            base[i] = ((limit[i - 1] + 1) << 1) - base[i];
452        }
453    }
454
455    private void recvDecodingTables() throws IOException {
456        final BitInputStream bin = this.bin;
457        final Data dataShadow = this.data;
458        final boolean[] inUse = dataShadow.inUse;
459        final byte[] pos = dataShadow.recvDecodingTables_pos;
460        final byte[] selector = dataShadow.selector;
461        final byte[] selectorMtf = dataShadow.selectorMtf;
462
463        int inUse16 = 0;
464
465        /* Receive the mapping table */
466        for (int i = 0; i < 16; i++) {
467            if (bsGetBit(bin)) {
468                inUse16 |= 1 << i;
469            }
470        }
471
472        Arrays.fill(inUse, false);
473        for (int i = 0; i < 16; i++) {
474            if ((inUse16 & (1 << i)) != 0) {
475                final int i16 = i << 4;
476                for (int j = 0; j < 16; j++) {
477                    if (bsGetBit(bin)) {
478                        inUse[i16 + j] = true;
479                    }
480                }
481            }
482        }
483
484        makeMaps();
485        final int alphaSize = this.nInUse + 2;
486        /* Now the selectors */
487        final int nGroups = bsR(bin, 3);
488        final int nSelectors = bsR(bin, 15);
489        checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize");
490        checkBounds(nGroups, N_GROUPS + 1, "nGroups");
491        checkBounds(nSelectors, MAX_SELECTORS + 1, "nSelectors");
492
493        for (int i = 0; i < nSelectors; i++) {
494            int j = 0;
495            while (bsGetBit(bin)) {
496                j++;
497            }
498            selectorMtf[i] = (byte) j;
499        }
500
501        /* Undo the MTF values for the selectors. */
502        for (int v = nGroups; --v >= 0;) {
503            pos[v] = (byte) v;
504        }
505
506        for (int i = 0; i < nSelectors; i++) {
507            int v = selectorMtf[i] & 0xff;
508            checkBounds(v, N_GROUPS, "selectorMtf");
509            final byte tmp = pos[v];
510            while (v > 0) {
511                // nearly all times v is zero, 4 in most other cases
512                pos[v] = pos[v - 1];
513                v--;
514            }
515            pos[0] = tmp;
516            selector[i] = tmp;
517        }
518
519        final char[][] len = dataShadow.temp_charArray2d;
520
521        /* Now the coding tables */
522        for (int t = 0; t < nGroups; t++) {
523            int curr = bsR(bin, 5);
524            final char[] len_t = len[t];
525            for (int i = 0; i < alphaSize; i++) {
526                while (bsGetBit(bin)) {
527                    curr += bsGetBit(bin) ? -1 : 1;
528                }
529                len_t[i] = (char) curr;
530            }
531        }
532
533        // finally create the Huffman tables
534        createHuffmanDecodingTables(alphaSize, nGroups);
535    }
536
537    /**
538     * Called by recvDecodingTables() exclusively.
539     */
540    private void createHuffmanDecodingTables(final int alphaSize,
541                                             final int nGroups) throws IOException {
542        final Data dataShadow = this.data;
543        final char[][] len = dataShadow.temp_charArray2d;
544        final int[] minLens = dataShadow.minLens;
545        final int[][] limit = dataShadow.limit;
546        final int[][] base = dataShadow.base;
547        final int[][] perm = dataShadow.perm;
548
549        for (int t = 0; t < nGroups; t++) {
550            int minLen = 32;
551            int maxLen = 0;
552            final char[] len_t = len[t];
553            for (int i = alphaSize; --i >= 0;) {
554                final char lent = len_t[i];
555                if (lent > maxLen) {
556                    maxLen = lent;
557                }
558                if (lent < minLen) {
559                    minLen = lent;
560                }
561            }
562            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
563                                 maxLen, alphaSize);
564            minLens[t] = minLen;
565        }
566    }
567
568    private void getAndMoveToFrontDecode() throws IOException {
569        final BitInputStream bin = this.bin;
570        this.origPtr = bsR(bin, 24);
571        recvDecodingTables();
572
573        final Data dataShadow = this.data;
574        final byte[] ll8 = dataShadow.ll8;
575        final int[] unzftab = dataShadow.unzftab;
576        final byte[] selector = dataShadow.selector;
577        final byte[] seqToUnseq = dataShadow.seqToUnseq;
578        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
579        final int[] minLens = dataShadow.minLens;
580        final int[][] limit = dataShadow.limit;
581        final int[][] base = dataShadow.base;
582        final int[][] perm = dataShadow.perm;
583        final int limitLast = this.blockSize100k * 100000;
584
585        /*
586         * Setting up the unzftab entries here is not strictly necessary, but it
587         * does save having to do it later in a separate pass, and so saves a
588         * block's worth of cache misses.
589         */
590        for (int i = 256; --i >= 0;) {
591            yy[i] = (char) i;
592            unzftab[i] = 0;
593        }
594
595        int groupNo = 0;
596        int groupPos = G_SIZE - 1;
597        final int eob = this.nInUse + 1;
598        int nextSym = getAndMoveToFrontDecode0();
599        int lastShadow = -1;
600        int zt = selector[groupNo] & 0xff;
601        checkBounds(zt, N_GROUPS, "zt");
602        int[] base_zt = base[zt];
603        int[] limit_zt = limit[zt];
604        int[] perm_zt = perm[zt];
605        int minLens_zt = minLens[zt];
606
607        while (nextSym != eob) {
608            if ((nextSym == RUNA) || (nextSym == RUNB)) {
609                int s = -1;
610
611                for (int n = 1; true; n <<= 1) {
612                    if (nextSym == RUNA) {
613                        s += n;
614                    } else if (nextSym == RUNB) {
615                        s += n << 1;
616                    } else {
617                        break;
618                    }
619
620                    if (groupPos == 0) {
621                        groupPos = G_SIZE - 1;
622                        checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
623                        zt = selector[groupNo] & 0xff;
624                        checkBounds(zt, N_GROUPS, "zt");
625                        base_zt = base[zt];
626                        limit_zt = limit[zt];
627                        perm_zt = perm[zt];
628                        minLens_zt = minLens[zt];
629                    } else {
630                        groupPos--;
631                    }
632
633                    int zn = minLens_zt;
634                    checkBounds(zn, MAX_ALPHA_SIZE, "zn");
635                    int zvec = bsR(bin, zn);
636                    while(zvec > limit_zt[zn]) {
637                        checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
638                        zvec = (zvec << 1) | bsR(bin, 1);
639                    }
640                    final int tmp = zvec - base_zt[zn];
641                    checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
642                    nextSym = perm_zt[tmp];
643                }
644
645                final int yy0 = yy[0];
646                checkBounds(yy0, 256, "yy");
647                final byte ch = seqToUnseq[yy0];
648                unzftab[ch & 0xff] += s + 1;
649
650                while (s-- >= 0) {
651                    ll8[++lastShadow] = ch;
652                }
653
654                if (lastShadow >= limitLast) {
655                    throw new IOException("block overrun");
656                }
657            } else {
658                if (++lastShadow >= limitLast) {
659                    throw new IOException("block overrun");
660                }
661                checkBounds(nextSym, 256 + 1, "nextSym");
662
663                final char tmp = yy[nextSym - 1];
664                checkBounds(tmp, 256, "yy");
665                unzftab[seqToUnseq[tmp] & 0xff]++;
666                ll8[lastShadow] = seqToUnseq[tmp];
667
668                /*
669                 * This loop is hammered during decompression, hence avoid
670                 * native method call overhead of System.arraycopy for very
671                 * small ranges to copy.
672                 */
673                if (nextSym <= 16) {
674                    for (int j = nextSym - 1; j > 0;) {
675                        yy[j] = yy[--j];
676                    }
677                } else {
678                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
679                }
680
681                yy[0] = tmp;
682
683                if (groupPos == 0) {
684                    groupPos = G_SIZE - 1;
685                    checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
686                    zt = selector[groupNo] & 0xff;
687                    checkBounds(zt, N_GROUPS, "zt");
688                    base_zt = base[zt];
689                    limit_zt = limit[zt];
690                    perm_zt = perm[zt];
691                    minLens_zt = minLens[zt];
692                } else {
693                    groupPos--;
694                }
695
696                int zn = minLens_zt;
697                checkBounds(zn, MAX_ALPHA_SIZE, "zn");
698                int zvec = bsR(bin, zn);
699                while(zvec > limit_zt[zn]) {
700                    checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
701                    zvec = (zvec << 1) | bsR(bin, 1);
702                }
703                final int idx = zvec - base_zt[zn];
704                checkBounds(idx, MAX_ALPHA_SIZE, "zvec");
705                nextSym = perm_zt[idx];
706            }
707        }
708
709        this.last = lastShadow;
710    }
711
712    private int getAndMoveToFrontDecode0() throws IOException {
713        final Data dataShadow = this.data;
714        final int zt = dataShadow.selector[0] & 0xff;
715        checkBounds(zt, N_GROUPS, "zt");
716        final int[] limit_zt = dataShadow.limit[zt];
717        int zn = dataShadow.minLens[zt];
718        checkBounds(zn, MAX_ALPHA_SIZE, "zn");
719        int zvec = bsR(bin, zn);
720        while (zvec > limit_zt[zn]) {
721            checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
722            zvec = (zvec << 1) | bsR(bin, 1);
723        }
724        final int tmp = zvec - dataShadow.base[zt][zn];
725        checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
726
727        return dataShadow.perm[zt][tmp];
728    }
729
730    private int setupBlock() throws IOException {
731        if (currentState == EOF || this.data == null) {
732            return -1;
733        }
734
735        final int[] cftab = this.data.cftab;
736        final int ttLen = this.last + 1;
737        final int[] tt = this.data.initTT(ttLen);
738        final byte[] ll8 = this.data.ll8;
739        cftab[0] = 0;
740        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
741
742        for (int i = 1, c = cftab[0]; i <= 256; i++) {
743            c += cftab[i];
744            cftab[i] = c;
745        }
746
747        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
748            final int tmp = cftab[ll8[i] & 0xff]++;
749            checkBounds(tmp, ttLen, "tt index");
750            tt[tmp] = i;
751        }
752
753        if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
754            throw new IOException("stream corrupted");
755        }
756
757        this.su_tPos = tt[this.origPtr];
758        this.su_count = 0;
759        this.su_i2 = 0;
760        this.su_ch2 = 256; /* not a char and not EOF */
761
762        if (this.blockRandomised) {
763            this.su_rNToGo = 0;
764            this.su_rTPos = 0;
765            return setupRandPartA();
766        }
767        return setupNoRandPartA();
768    }
769
770    private int setupRandPartA() throws IOException {
771        if (this.su_i2 <= this.last) {
772            this.su_chPrev = this.su_ch2;
773            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
774            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
775            this.su_tPos = this.data.tt[this.su_tPos];
776            if (this.su_rNToGo == 0) {
777                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
778                if (++this.su_rTPos == 512) {
779                    this.su_rTPos = 0;
780                }
781            } else {
782                this.su_rNToGo--;
783            }
784            this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
785            this.su_i2++;
786            this.currentState = RAND_PART_B_STATE;
787            this.crc.updateCRC(su_ch2Shadow);
788            return su_ch2Shadow;
789        }
790        endBlock();
791        initBlock();
792        return setupBlock();
793    }
794
795    private int setupNoRandPartA() throws IOException {
796        if (this.su_i2 <= this.last) {
797            this.su_chPrev = this.su_ch2;
798            final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
799            this.su_ch2 = su_ch2Shadow;
800            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
801            this.su_tPos = this.data.tt[this.su_tPos];
802            this.su_i2++;
803            this.currentState = NO_RAND_PART_B_STATE;
804            this.crc.updateCRC(su_ch2Shadow);
805            return su_ch2Shadow;
806        }
807        this.currentState = NO_RAND_PART_A_STATE;
808        endBlock();
809        initBlock();
810        return setupBlock();
811    }
812
813    private int setupRandPartB() throws IOException {
814        if (this.su_ch2 != this.su_chPrev) {
815            this.currentState = RAND_PART_A_STATE;
816            this.su_count = 1;
817            return setupRandPartA();
818        } else if (++this.su_count >= 4) {
819            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
820            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
821            this.su_tPos = this.data.tt[this.su_tPos];
822            if (this.su_rNToGo == 0) {
823                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
824                if (++this.su_rTPos == 512) {
825                    this.su_rTPos = 0;
826                }
827            } else {
828                this.su_rNToGo--;
829            }
830            this.su_j2 = 0;
831            this.currentState = RAND_PART_C_STATE;
832            if (this.su_rNToGo == 1) {
833                this.su_z ^= 1;
834            }
835            return setupRandPartC();
836        } else {
837            this.currentState = RAND_PART_A_STATE;
838            return setupRandPartA();
839        }
840    }
841
842    private int setupRandPartC() throws IOException {
843        if (this.su_j2 < this.su_z) {
844            this.crc.updateCRC(this.su_ch2);
845            this.su_j2++;
846            return this.su_ch2;
847        }
848        this.currentState = RAND_PART_A_STATE;
849        this.su_i2++;
850        this.su_count = 0;
851        return setupRandPartA();
852    }
853
854    private int setupNoRandPartB() throws IOException {
855        if (this.su_ch2 != this.su_chPrev) {
856            this.su_count = 1;
857            return setupNoRandPartA();
858        } else if (++this.su_count >= 4) {
859            checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos");
860            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
861            this.su_tPos = this.data.tt[this.su_tPos];
862            this.su_j2 = 0;
863            return setupNoRandPartC();
864        } else {
865            return setupNoRandPartA();
866        }
867    }
868
869    private int setupNoRandPartC() throws IOException {
870        if (this.su_j2 < this.su_z) {
871            final int su_ch2Shadow = this.su_ch2;
872            this.crc.updateCRC(su_ch2Shadow);
873            this.su_j2++;
874            this.currentState = NO_RAND_PART_C_STATE;
875            return su_ch2Shadow;
876        }
877        this.su_i2++;
878        this.su_count = 0;
879        return setupNoRandPartA();
880    }
881
882    private static final class Data {
883
884        // (with blockSize 900k)
885        final boolean[] inUse = new boolean[256]; // 256 byte
886
887        final byte[] seqToUnseq = new byte[256]; // 256 byte
888        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
889        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
890
891        /**
892         * Freq table collected to save a pass over the data during
893         * decompression.
894         */
895        final int[] unzftab = new int[256]; // 1024 byte
896
897        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
898        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
899        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
900        final int[] minLens = new int[N_GROUPS]; // 24 byte
901
902        final int[] cftab = new int[257]; // 1028 byte
903        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
904        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
905        // byte
906        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
907        // ---------------
908        // 60798 byte
909
910        int[] tt; // 3600000 byte
911        byte[] ll8; // 900000 byte
912
913        // ---------------
914        // 4560782 byte
915        // ===============
916
917        Data(final int blockSize100k) {
918            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
919        }
920
921        /**
922         * Initializes the {@link #tt} array.
923         *
924         * This method is called when the required length of the array is known.
925         * I don't initialize it at construction time to avoid unneccessary
926         * memory allocation when compressing small files.
927         */
928        int[] initTT(final int length) {
929            int[] ttShadow = this.tt;
930
931            // tt.length should always be >= length, but theoretically
932            // it can happen, if the compressor mixed small and large
933            // blocks. Normally only the last block will be smaller
934            // than others.
935            if ((ttShadow == null) || (ttShadow.length < length)) {
936                this.tt = ttShadow = new int[length];
937            }
938
939            return ttShadow;
940        }
941
942    }
943
944    /**
945     * Checks if the signature matches what is expected for a bzip2 file.
946     *
947     * @param signature
948     *            the bytes to check
949     * @param length
950     *            the number of bytes to check
951     * @return true, if this stream is a bzip2 compressed stream, false otherwise
952     *
953     * @since 1.1
954     */
955    public static boolean matches(final byte[] signature, final int length) {
956        return length >= 3 && signature[0] == 'B' &&
957                signature[1] == 'Z' && signature[2] == 'h';
958    }
959}